Windshaft-cartodb/lib/models/aggregation/aggregation-query.js

442 lines
15 KiB
JavaScript
Raw Normal View History

'use strict';
2018-09-21 03:12:54 +08:00
const timeDimension = require('./time-dimension');
const DEFAULT_PLACEMENT = 'point-sample';
2019-09-13 22:32:37 +08:00
const WebMercatorHelper = require('cartodb-query-tables').utils.webMercatorHelper;
const webmercator = new WebMercatorHelper();
2018-10-05 01:50:14 +08:00
function optionsToParams (options) {
return {
sourceQuery: options.query,
2019-10-22 01:07:24 +08:00
res: 256 / options.resolution,
2018-10-05 01:50:14 +08:00
columns: options.columns,
dimensions: options.dimensions,
filters: options.filters,
placement: options.placement || DEFAULT_PLACEMENT,
isDefaultAggregation: options.isDefaultAggregation
2018-10-05 01:50:14 +08:00
};
}
/**
* Generates an aggregation query given the aggregation options:
* - query
* - resolution - defined as in torque:
* aggregation cell is resolution*resolution pixels, where tiles are always 256x256 pixels
* - columns
* - placement
* - dimensions
*
* The default aggregation (when no explicit placement, columns or dimensions are present) returns
* a sample record (with all the original columns and _cdb_feature_count) for each aggregation group.
* When placement, columns or dimensions are specified, columns are aggregated as requested
* (by default only _cdb_feature_count) and with the_geom_webmercator as defined by placement.
*/
2019-07-15 21:35:50 +08:00
const queryForOptions = (options) => aggregationQueryTemplate(optionsToParams(options));
module.exports = queryForOptions;
2018-10-05 01:50:14 +08:00
module.exports.infoForOptions = (options) => {
const params = optionsToParams(options);
const dimensions = {};
dimensionNamesAndExpressions(params).forEach(([dimensionName, info]) => {
dimensions[dimensionName] = {
sql: info.sql,
params: info.effectiveParams,
type: info.type
};
});
return dimensions;
};
const SUPPORTED_AGGREGATE_FUNCTIONS = {
2019-10-22 01:07:24 +08:00
count: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `count(${params.aggregated_column || '*'})`
},
2019-10-22 01:07:24 +08:00
avg: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `avg(${params.aggregated_column || columnName})`
},
2019-10-22 01:07:24 +08:00
sum: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `sum(${params.aggregated_column || columnName})`
},
2019-10-22 01:07:24 +08:00
min: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `min(${params.aggregated_column || columnName})`
},
2019-10-22 01:07:24 +08:00
max: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `max(${params.aggregated_column || columnName})`
2017-12-14 23:37:40 +08:00
},
2019-10-22 01:07:24 +08:00
mode: {
2019-11-14 18:36:47 +08:00
sql: (columnName, params) => `mode() WITHIN GROUP (ORDER BY ${params.aggregated_column || columnName})`
}
};
2017-12-19 03:06:16 +08:00
module.exports.SUPPORTED_AGGREGATE_FUNCTIONS = Object.keys(SUPPORTED_AGGREGATE_FUNCTIONS);
const sep = (list) => {
2019-10-22 01:07:24 +08:00
const expr = list.join(', ');
return expr ? ', ' + expr : expr;
};
const aggregateColumns = ctx => {
2017-12-14 23:37:15 +08:00
return Object.assign({
_cdb_feature_count: {
aggregate_function: 'count'
}
}, ctx.columns || {});
2017-12-14 23:37:15 +08:00
};
2019-11-14 18:36:47 +08:00
const aggregateExpression = (columnName, columnParameters) => {
const aggregateFunction = columnParameters.aggregate_function || 'count';
const aggregateDefinition = SUPPORTED_AGGREGATE_FUNCTIONS[aggregateFunction];
if (!aggregateDefinition) {
throw new Error("Invalid Aggregate function: '" + aggregateFunction + "'");
2018-03-22 00:01:32 +08:00
}
2019-11-14 18:36:47 +08:00
return aggregateDefinition.sql(columnName, columnParameters);
2018-03-22 00:01:32 +08:00
};
2017-12-14 23:37:15 +08:00
const aggregateColumnDefs = ctx => {
2019-10-22 01:07:24 +08:00
const columns = aggregateColumns(ctx);
2019-11-14 18:36:47 +08:00
return sep(Object.keys(columns).map(columnName => {
const aggregate = aggregateExpression(columnName, columns[columnName]);
return `${aggregate} AS ${columnName}`;
}));
};
const aggregateDimensions = ctx => ctx.dimensions || {};
2018-09-21 03:12:54 +08:00
const timeDimensionParameters = definition => {
// definition.column should correspond to a wrapped date column
2018-10-06 02:08:40 +08:00
const group = definition.group || {};
2018-09-21 03:12:54 +08:00
return {
time: `to_timestamp("${definition.column}")`,
2018-10-06 02:08:40 +08:00
timezone: group.timezone || 'utc',
units: group.units,
count: group.count || 1,
starting: group.starting,
2018-10-04 03:02:22 +08:00
format: definition.format
2018-09-21 03:12:54 +08:00
};
};
// Adapt old-style dimension definitions for backwards compatibility
const adaptDimensionDefinition = definition => {
2019-10-22 01:07:24 +08:00
if (typeof (definition) === 'string') {
return { column: definition };
}
return definition;
};
const dimensionExpression = definition => {
2018-10-06 02:08:40 +08:00
if (definition.group) {
// Currently only time dimensions are supported with parameters
2018-10-05 01:50:14 +08:00
return Object.assign({ type: 'timeDimension' }, timeDimension(timeDimensionParameters(definition)));
} else {
2018-10-05 01:50:14 +08:00
return { sql: `"${definition.column}"` };
2018-09-21 03:12:54 +08:00
}
};
2018-10-05 01:50:14 +08:00
const dimensionNamesAndExpressions = (ctx) => {
2019-10-22 01:07:24 +08:00
const dimensions = aggregateDimensions(ctx);
2018-10-05 01:50:14 +08:00
return Object.keys(dimensions).map(dimensionName => {
const dimension = adaptDimensionDefinition(dimensions[dimensionName]);
const expression = dimensionExpression(dimension);
return [dimensionName, expression];
});
};
const dimensionNames = (ctx, table) => {
2018-10-07 00:26:43 +08:00
return sep(dimensionNamesAndExpressions(ctx).map(([dimensionName]) => {
return table ? `${table}."${dimensionName}"` : `"${dimensionName}"`;
}));
2017-12-12 18:18:18 +08:00
};
const dimensionDefs = ctx => {
2018-10-07 00:26:43 +08:00
return sep(
dimensionNamesAndExpressions(ctx)
2019-10-22 01:07:24 +08:00
.map(([dimensionName, expression]) => `${expression.sql} AS "${dimensionName}"`)
2018-10-07 00:26:43 +08:00
);
};
2017-12-14 23:51:55 +08:00
2018-03-22 00:01:32 +08:00
const aggregateFilters = ctx => ctx.filters || {};
const filterConditionSQL = (expr, filter) => {
// TODO: validate filter parameters (e.g. cannot have both greater_than and greater_than or equal to)
if (filter) {
if (!Array.isArray(filter)) {
filter = [filter];
}
if (filter.length > 0) {
return filter.map(f => filterSingleConditionSQL(expr, f)).join(' OR ');
}
}
};
const filterSingleConditionSQL = (expr, filter) => {
let cond;
Object.keys(FILTERS).some(f => {
cond = FILTERS[f](expr, filter);
return cond;
});
return cond;
};
const sqlQ = (value) => {
if (isFinite(value)) {
return String(value);
}
return `'${value}'`; // TODO: escape single quotes! (by doubling them)
};
/* jshint eqeqeq: false */
/* x != null is used to check for both null and undefined; triple !== wouldn't do the trick */
const FILTERS = {
between: (expr, filter) => {
2019-10-22 01:07:24 +08:00
const lo = filter.greater_than_or_equal_to; const hi = filter.less_than_or_equal_to;
2018-03-22 00:01:32 +08:00
if (lo != null && hi != null) {
return `(${expr} BETWEEN ${sqlQ(lo)} AND ${sqlQ(hi)})`;
}
},
in: (expr, filter) => {
if (filter.in != null) {
return `(${expr} IN (${filter.in.map(v => sqlQ(v)).join(',')}))`;
}
},
notin: (expr, filter) => {
if (filter.not_in != null) {
return `(${expr} NOT IN (${filter.not_in.map(v => sqlQ(v)).join(',')}))`;
}
},
equal: (expr, filter) => {
if (filter.equal != null) {
return `(${expr} = ${sqlQ(filter.equal)})`;
}
},
not_equal: (expr, filter) => {
if (filter.not_equal != null) {
return `(${expr} <> ${sqlQ(filter.not_equal)})`;
}
},
range: (expr, filter) => {
2019-10-22 01:07:24 +08:00
const conds = [];
2018-03-22 00:01:32 +08:00
if (filter.greater_than_or_equal_to != null) {
conds.push(`(${expr} >= ${sqlQ(filter.greater_than_or_equal_to)})`);
}
if (filter.greater_than != null) {
conds.push(`(${expr} > ${sqlQ(filter.greater_than)})`);
}
if (filter.less_than_or_equal_to != null) {
conds.push(`(${expr} <= ${sqlQ(filter.less_than_or_equal_to)})`);
}
if (filter.less_than != null) {
conds.push(`(${expr} < ${sqlQ(filter.less_than)})`);
}
if (conds.length > 0) {
return conds.join(' AND ');
}
}
};
const filterConditions = ctx => {
2019-10-22 01:07:24 +08:00
const columns = aggregateColumns(ctx);
const dimensions = aggregateDimensions(ctx);
const filters = aggregateFilters(ctx);
2019-11-14 18:36:47 +08:00
return Object.keys(filters).map(filteredColumn => {
let filteredExpr;
if (columns[filteredColumn]) {
filteredExpr = aggregateExpression(filteredColumn, columns[filteredColumn]);
} else if (dimensions[filteredColumn]) {
filteredExpr = dimensions[filteredColumn];
2018-03-22 00:01:32 +08:00
}
2019-11-14 18:36:47 +08:00
if (!filteredExpr) {
throw new Error("Invalid filtered column: '" + filteredColumn + "'");
2018-03-22 00:01:32 +08:00
}
2019-11-14 18:36:47 +08:00
return filterConditionSQL(filteredExpr, filters[filteredColumn]);
2018-03-22 00:01:32 +08:00
}).join(' AND ');
};
const havingClause = ctx => {
2019-10-22 01:07:24 +08:00
const cond = filterConditions(ctx);
2018-03-22 00:01:32 +08:00
return cond ? `HAVING ${cond}` : '';
};
2017-12-14 23:51:55 +08:00
// SQL expression to compute the aggregation resolution (grid cell size).
// This is defined by the ctx.res parameter, which is the number of grid cells per tile linear dimension
// (i.e. each tile is divided into ctx.res*ctx.res cells).
// We limit the the minimum resolution to avoid division by zero problems. The limit used is
// the pixel size of zoom level 30 (i.e. 1/2*(30+8) of the full earth web-mercator extent), which is about 0.15 mm.
//
2019-07-15 21:54:31 +08:00
// NOTE: We'd rather use !pixel_width!, but in Mapnik this value is extent / 256 for raster
// and extent / tile_extent {4096 default} for MVT, so since aggregations are always based
// on 256 we can't have the same query in both cases
// As this scale change doesn't happen in !scale_denominator! we use that instead
2019-07-15 21:54:31 +08:00
// NOTE 2: The 0.00028 is used in Mapnik (and replicated in pg-mvt) and comes from
// OGC's Styled Layer Descriptor Implementation Specification
const gridResolution = ctx => {
2019-10-22 01:07:24 +08:00
const minimumResolution = webmercator.getResolution({ z: 38 });
return `${256 / ctx.res} * GREATEST(!scale_denominator! * 0.00028, ${minimumResolution})::double precision`;
};
2017-12-14 23:51:55 +08:00
2019-07-15 21:54:31 +08:00
// SQL query to extract the boundaries of the area to be aggregated and the grid resolution
2019-07-08 21:51:55 +08:00
// cdb_{x-y}{min_max} return the limits of the tile. Aggregations do [min, max) in both axis
// cdb_res: Aggregation resolution (as specified by gridResolution)
// cdb_point_bbox: Tile bounding box [min, max]
const gridInfoQuery = ctx => {
return `
SELECT
2019-07-08 21:51:55 +08:00
cdb_xmin,
cdb_ymin,
cdb_xmax,
cdb_ymax,
cdb_res,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox
FROM
(
SELECT
2019-07-08 21:51:55 +08:00
cdb_res,
CEIL (ST_XMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymax
FROM
(
SELECT
${gridResolution(ctx)} AS cdb_res,
!bbox! cdb_full_bbox
) _cdb_input_resources
) _cdb_grid_bbox_margins
`;
};
2019-07-08 21:51:55 +08:00
// Function to generate the resulting point for a cell from the aggregated data
2019-07-09 00:52:35 +08:00
const aggregatedPointWebMercator = (ctx) => {
switch (ctx.placement) {
2019-10-22 01:07:24 +08:00
// For centroid, we return the average of the cell
case 'centroid':
return ', ST_SetSRID(ST_MakePoint(AVG(cdb_x), AVG(cdb_y)), 3857) AS the_geom_webmercator';
// Middle point of the cell
2019-10-22 01:07:24 +08:00
case 'point-grid':
return ', ST_SetSRID(ST_MakePoint(cdb_pos_grid_x, cdb_pos_grid_y), 3857) AS the_geom_webmercator';
2019-07-09 00:52:35 +08:00
// For point-sample we'll get a single point directly from the source
// If it's default aggregation we'll add the extra columns to keep backwards compatibility
2019-10-22 01:07:24 +08:00
case 'point-sample':
return '';
2019-07-09 00:52:35 +08:00
2019-10-22 01:07:24 +08:00
default:
throw new Error(`Invalid aggregation placement "${ctx.placement}"`);
2019-07-09 00:52:35 +08:00
}
};
// Function to generate the resulting point for a cell from the a join with the source
const aggregatedPointJoin = (ctx) => {
switch (ctx.placement) {
2019-10-22 01:07:24 +08:00
case 'centroid':
return '';
case 'point-grid':
return '';
// For point-sample we'll get a single point directly from the source
// If it's default aggregation we'll add the extra columns to keep backwards compatibility
2019-10-22 01:07:24 +08:00
case 'point-sample':
return `
NATURAL JOIN
(
SELECT ${ctx.isDefaultAggregation ? '*' : 'cartodb_id, the_geom_webmercator'}
FROM
(
${ctx.sourceQuery}
) __cdb_src_query
) __cdb_query_columns
`;
2019-10-22 01:07:24 +08:00
default:
throw new Error(`Invalid aggregation placement "${ctx.placement}"`);
}
};
2019-07-08 21:51:55 +08:00
// Function to generate the values common to all points in a cell
// By default we use the cell number (which is fast), but for point-grid we
// get the coordinates of the mid point so we don't need to calculate them later
// which requires extra data in the group by clause
const aggregatedPosCoordinate = (ctx, coordinate) => {
2019-07-09 00:52:35 +08:00
switch (ctx.placement) {
2019-10-22 01:07:24 +08:00
// For point-grid we return the coordinate of the middle point of the grid
case 'point-grid':
return `(FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res) + 0.5) * __cdb_grid_params.cdb_res`;
2019-07-08 21:51:55 +08:00
// For other, we return the cell position (relative to the world)
2019-10-22 01:07:24 +08:00
default:
return `FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res)`;
2019-07-08 21:51:55 +08:00
}
};
2019-07-15 21:35:50 +08:00
const aggregationQueryTemplate = ctx => `
2019-07-08 21:51:55 +08:00
WITH __cdb_grid_params AS
(
${gridInfoQuery(ctx)}
)
SELECT * FROM
(
SELECT
min(cartodb_id) as cartodb_id
2019-07-09 00:52:35 +08:00
${aggregatedPointWebMercator(ctx)}
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM
(
SELECT
2019-07-08 21:51:55 +08:00
*,
${aggregatedPosCoordinate(ctx, 'x')} as cdb_pos_grid_x,
${aggregatedPosCoordinate(ctx, 'y')} as cdb_pos_grid_y
FROM
(
SELECT
2019-07-08 21:51:55 +08:00
__cdb_src_query.*,
ST_X(the_geom_webmercator) cdb_x,
ST_Y(the_geom_webmercator) cdb_y
FROM
(
2019-07-08 21:51:55 +08:00
${ctx.sourceQuery}
) __cdb_src_query, __cdb_grid_params
WHERE the_geom_webmercator && cdb_point_bbox
OFFSET 0
) __cdb_src_get_x_y, __cdb_grid_params
WHERE cdb_x < __cdb_grid_params.cdb_xmax AND cdb_y < __cdb_grid_params.cdb_ymax
2019-07-09 00:32:04 +08:00
) __cdb_src_gridded
2019-07-08 21:51:55 +08:00
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y ${dimensionNames(ctx)}
${havingClause(ctx)}
) __cdb_aggregation_src
2019-07-09 00:52:35 +08:00
${aggregatedPointJoin(ctx)}
`;
2019-07-15 21:35:50 +08:00
module.exports.SUPPORTED_PLACEMENTS = ['centroid', 'point-grid', 'point-sample'];
module.exports.GEOMETRY_COLUMN = 'the_geom_webmercator';
const clusterFeaturesQuery = ctx => `
WITH
_cdb_params AS (
SELECT
${gridResolution(ctx)} AS res
),
_cell AS (
SELECT
ST_MakeEnvelope(
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res)*_cdb_params.res,
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)*_cdb_params.res,
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res + 1)*_cdb_params.res,
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res + 1)*_cdb_params.res,
3857
) AS bbox
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE _cdb_query.cartodb_id = ${ctx.id}
)
SELECT _cdb_query.* FROM _cell, (${ctx.sourceQuery}) _cdb_query
WHERE ST_Intersects(_cdb_query.the_geom_webmercator, _cell.bbox)
`;
module.exports.featuresQuery = (id, options) => clusterFeaturesQuery({
id,
sourceQuery: options.query,
2019-10-22 01:07:24 +08:00
res: 256 / options.resolution
});