WIP to change how aggregations are calculated

This commit is contained in:
Raul Marin 2019-06-28 13:50:26 +02:00
parent cd8624ae2d
commit a3e8f45552
2 changed files with 65 additions and 128 deletions

View File

@ -157,8 +157,7 @@ const clusterFeaturesQuery = ctx => `
const gridResolution = ctx => {
const minimumResolution = 2*Math.PI*6378137/Math.pow(2,38);
const pixelSize = `CDB_XYZ_Resolution(${ctx.zoom})`;
return `GREATEST(${256/ctx.res}*${pixelSize}, ${minimumResolution})::double precision`;
return `${256/ctx.res} * GREATEST(!pixel_height!, ${minimumResolution})::numeric`;
};
const aggregationQuery = ctx => `

View File

@ -35,7 +35,8 @@ function optionsToParams (options) {
res: 256/options.resolution,
columns: options.columns,
dimensions: options.dimensions,
filters: options.filters
filters: options.filters,
placement: options.placement || DEFAULT_PLACEMENT
};
}
@ -297,31 +298,24 @@ const havingClause = ctx => {
// (i.e. each tile is divided into ctx.res*ctx.res cells).
// We limit the the minimum resolution to avoid division by zero problems. The limit used is
// the pixel size of zoom level 30 (i.e. 1/2*(30+8) of the full earth web-mercator extent), which is about 0.15 mm.
// Computing this using !scale_denominator!, !pixel_width! or !pixel_height! produces
// inaccurate results due to rounding present in those values.
const gridResolution = ctx => {
const minimumResolution = 2*Math.PI*6378137/Math.pow(2,38);
const pixelSize = 'CDB_XYZ_Resolution(CDB_ZoomFromScale(!scale_denominator!))';
return `GREATEST(${256/ctx.res}*${pixelSize}, ${minimumResolution})::double precision`;
return `${256/ctx.res} * GREATEST(!pixel_height!, ${minimumResolution})::numeric`;
};
// Each aggregation cell is defined by the cell coordinates Floor(x/res), Floor(y/res),
// i.e. they include the West and South borders but not the East and North ones.
// So, to avoid picking points that don't belong to cells in the tile, given the tile
// limits Xmin, Ymin, Xmax, Ymax (bbox), we should select points that satisfy
// Xmin <= x < Xmax and Ymin <= y < Ymax (with x, y from the_geom_webmercator)
// On the other hand we can efficiently filter spatially (relying on spatial indexing)
// with `the_geom_webmercator && bbox` which is equivalent to
// Xmin <= x <= Xmax and Ymin <= y <= Ymax
// So, in order to be both efficient and accurate we will need to use both
// conditions for spatial filtering.
const spatialFilter = `
(_cdb_query.the_geom_webmercator && _cdb_params.bbox) AND
ST_X(_cdb_query.the_geom_webmercator) >= _cdb_params.xmin AND
ST_X(_cdb_query.the_geom_webmercator) < _cdb_params.xmax AND
ST_Y(_cdb_query.the_geom_webmercator) >= _cdb_params.ymin AND
ST_Y(_cdb_query.the_geom_webmercator) < _cdb_params.ymax
`;
const aggregatedPoint = ctx => {
const placement = ctx.placement || DEFAULT_PLACEMENT;
switch (placement) {
case `centroid`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
case `point-grid`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
case `point-sample`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
default:
throw new Error(`Invalid aggregation placement "${placement}`);
}
};
// Notes:
// * We need to filter spatially using !bbox! to make the queries efficient because
@ -333,116 +327,60 @@ const spatialFilter = `
// * bbox coordinates can have an error in the last digits; we apply a small correction before
// applying CEIL or FLOOR to compensate for this, so that coordinates closer than a small (`eps`)
// fraction of the cell size to a cell limit are moved to the exact limit.
const sqlParams = (ctx) => `
_cdb_res AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox,
(1E-6::double precision) AS eps
),
_cdb_params AS (
SELECT
res,
bbox,
CEIL((ST_XMIN(bbox) - eps*res)/res)*res AS xmin,
FLOOR((ST_XMAX(bbox) + eps*res)/res)*res AS xmax,
CEIL((ST_YMIN(bbox) - eps*res)/res)*res AS ymin,
FLOOR((ST_YMAX(bbox) + eps*res)/res)*res AS ymax
FROM _cdb_res
)
`;
// The special default aggregation includes all the columns of a sample row per grid cell and
// the count (_cdb_feature_count) of the aggregated rows.
const defaultAggregationQueryTemplate = ctx => `
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(cartodb_id) AS cartodb_id
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)
${dimensionNames(ctx)}
) SELECT
_cdb_query.*
${aggregateColumnNames(ctx)}
SELECT
min(cartodb_id) as cartodb_id,
${aggregatedPoint(ctx)} AS the_geom_webmercator
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM
(
SELECT
__cdb_src_query.*,
cdb_limit_x,
cdb_limit_y,
((ST_X(the_geom_webmercator) - __cdb_src_params.cdb_xmin) / __cdb_src_params.res)::int as cdb_pos_grid_x,
((ST_Y(the_geom_webmercator) - __cdb_src_params.cdb_ymin) / __cdb_src_params.res)::int as cdb_pos_grid_y
FROM
_cdb_clusters INNER JOIN (${ctx.sourceQuery}) _cdb_query
ON (_cdb_clusters.cartodb_id = _cdb_query.cartodb_id)
(
${ctx.sourceQuery}
) __cdb_src_query,
(
SELECT
_cdb_grid_bbox_margins.*,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox,
ROUND((cdb_xmax - cdb_xmin) / res)::int as cdb_limit_x,
ROUND((cdb_ymax - cdb_ymin) / res)::int as cdb_limit_y
FROM
(
SELECT
res,
CEIL (ST_XMIN(cdb_full_bbox) / res) * res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / res) * res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / res) * res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / res) * res AS cdb_ymax
FROM
(
SELECT
${gridResolution(ctx)} AS res,
!bbox! cdb_full_bbox
OFFSET 0
) _cdb_input_resources
) _cdb_grid_bbox_margins OFFSET 0
) __cdb_src_params
WHERE the_geom_webmercator && cdb_point_bbox OFFSET 0
) __cdb_srd_grid
WHERE cdb_pos_grid_x < cdb_limit_x AND cdb_pos_grid_y < cdb_limit_y
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y ${dimensionNames(ctx)}
${havingClause(ctx)}
`;
const aggregationQueryTemplates = {
'centroid': ctx => `
WITH ${sqlParams(ctx)}
SELECT
MIN(_cdb_query.cartodb_id) AS cartodb_id,
ST_SetSRID(
ST_MakePoint(
AVG(ST_X(_cdb_query.the_geom_webmercator)),
AVG(ST_Y(_cdb_query.the_geom_webmercator))
), 3857
) AS the_geom_webmercator
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)
${dimensionNames(ctx)}
${havingClause(ctx)}
`,
'point-grid': ctx => `
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(_cdb_query.cartodb_id) AS cartodb_id,
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res)::int AS _cdb_gx,
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)::int AS _cdb_gy
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE ${spatialFilter}
GROUP BY _cdb_gx, _cdb_gy ${dimensionNames(ctx)}
${havingClause(ctx)}
)
SELECT
_cdb_clusters.cartodb_id AS cartodb_id,
ST_SetSRID(ST_MakePoint((_cdb_gx+0.5)*res, (_cdb_gy+0.5)*res), 3857) AS the_geom_webmercator
${dimensionNames(ctx)}
${aggregateColumnNames(ctx)}
FROM _cdb_clusters, _cdb_params
`,
'point-sample': ctx => `
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(cartodb_id) AS cartodb_id
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)
${dimensionNames(ctx)}
${havingClause(ctx)}
)
SELECT
_cdb_clusters.cartodb_id,
the_geom_webmercator
${dimensionNames(ctx, '_cdb_clusters')}
${aggregateColumnNames(ctx, '_cdb_clusters')}
FROM
_cdb_clusters INNER JOIN (${ctx.sourceQuery}) _cdb_query
ON (_cdb_clusters.cartodb_id = _cdb_query.cartodb_id)
`
'centroid': defaultAggregationQueryTemplate,
'point-grid': defaultAggregationQueryTemplate,
'point-sample': defaultAggregationQueryTemplate
};