Aggregation: Improve speeeeeeed

This commit is contained in:
Raul Marin 2019-07-08 15:51:55 +02:00
parent 8454eef6e9
commit 262f957218

View File

@ -310,26 +310,39 @@ const gridResolution = ctx => {
};
// SQL query to extra the boundaries of the area to be aggregated and the grid resolution
// cdb_{x-y}{min_max} return the limits of the tile. Aggregations do [min, max) in both axis
// cdb_res: Aggregation resolution (as specified by gridResolution)
// cdb_point_bbox: Tile bounding box [min, max]
const gridInfoQuery = ctx => {
return `
SELECT
res,
CEIL (ST_XMIN(cdb_full_bbox) / res) * res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / res) * res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / res) * res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / res) * res AS cdb_ymax
cdb_xmin,
cdb_ymin,
cdb_xmax,
cdb_ymax,
cdb_res,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox
FROM
(
SELECT
${gridResolution(ctx)} AS res,
cdb_res,
CEIL (ST_XMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymax
FROM
(
SELECT
${gridResolution(ctx)} AS cdb_res,
!bbox! cdb_full_bbox
OFFSET 0
) _cdb_input_resources
) _cdb_grid_bbox_margins
`;
};
// Function to generate the resulting point for a cell from the aggregated
// Function to generate the resulting point for a cell from the aggregated data
// Point sample joins the query with itself to get the data from the lowest id
const aggregatedPoint = (ctx, aggregated) => {
const placement = ctx.placement || DEFAULT_PLACEMENT;
switch (placement) {
@ -337,15 +350,13 @@ const aggregatedPoint = (ctx, aggregated) => {
// For centroid, we return the average of the cell
case `centroid`:
return aggregated ?
`, ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857) AS the_geom_webmercator` :
`, ST_SetSRID(ST_MakePoint(AVG(cdb_x), AVG(cdb_y)), 3857) AS the_geom_webmercator` :
``;
// Middle point of the cell
case `point-grid`:
return aggregated ?
`, ST_SetSRID(ST_MakePoint(cdb_xmin + (cdb_pos_grid_x + 0.5) * res,
cdb_ymin + (cdb_pos_grid_y + 0.5) * res),
3857) AS the_geom_webmercator`:
`, ST_SetSRID(ST_MakePoint(cdb_pos_grid_x, cdb_pos_grid_y), 3857) AS the_geom_webmercator`:
``;
// For point-sample we'll get a single point directly from the source
@ -368,19 +379,29 @@ const aggregatedPoint = (ctx, aggregated) => {
}
};
// Notes:
// * We need to filter spatially using !bbox! to make the queries efficient because
// the filter added by Mapnik (wrapping the query)
// is only applied after the aggregation.
// * This queries are used for rendering and the_geom is omitted in the results for better performance
// * If the MVT extent or tile buffer was 0 or a multiple of the resolution we could use directly
// the bbox for them, but in general we need to find the nearest cell limits inside the bbox.
// * bbox coordinates can have an error in the last digits; we apply a small correction before
// applying CEIL or FLOOR to compensate for this, so that coordinates closer than a small (`eps`)
// fraction of the cell size to a cell limit are moved to the exact limit.
// Function to generate the values common to all points in a cell
// By default we use the cell number (which is fast), but for point-grid we
// get the coordinates of the mid point so we don't need to calculate them later
// which requires extra data in the group by clause
const aggregatedPosCoordinate = (ctx, coordinate) => {
const placement = ctx.placement || DEFAULT_PLACEMENT;
switch (placement) {
// For point-grid we return the coordinate of the middle point of the grid
case `point-grid`:
return `(FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res) + 0.5) * __cdb_grid_params.cdb_res`;
// For other, we return the cell position (relative to the world)
default:
return `FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res)`;
}
};
const defaultAggregationQueryTemplate = ctx => `
WITH __cdb_grid_params AS
(
${gridInfoQuery(ctx)}
)
SELECT * FROM
(
SELECT
@ -389,31 +410,27 @@ SELECT * FROM
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM
(
SELECT
*,
${aggregatedPosCoordinate(ctx, 'x')} as cdb_pos_grid_x,
${aggregatedPosCoordinate(ctx, 'y')} as cdb_pos_grid_y
FROM
(
SELECT
__cdb_src_query.*,
__cdb_src_params.*,
FLOOR((ST_X(the_geom_webmercator) - __cdb_src_params.cdb_xmin) / __cdb_src_params.res) as cdb_pos_grid_x,
FLOOR((ST_Y(the_geom_webmercator) - __cdb_src_params.cdb_ymin) / __cdb_src_params.res) as cdb_pos_grid_y
ST_X(the_geom_webmercator) cdb_x,
ST_Y(the_geom_webmercator) cdb_y
FROM
(
${ctx.sourceQuery}
) __cdb_src_query,
(
SELECT
_cdb_grid_bbox_margins.*,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox,
((cdb_xmax - cdb_xmin) / res)::int as cdb_limit_x,
((cdb_ymax - cdb_ymin) / res)::int as cdb_limit_y
FROM
(
${gridInfoQuery(ctx)}
) _cdb_grid_bbox_margins OFFSET 0
) __cdb_src_params
WHERE the_geom_webmercator && cdb_point_bbox OFFSET 0
) __cdb_srd_grid
WHERE cdb_pos_grid_x < cdb_limit_x AND cdb_pos_grid_y < cdb_limit_y
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y, __cdb_srd_grid.cdb_xmin, __cdb_srd_grid.cdb_ymin, __cdb_srd_grid.res ${dimensionNames(ctx)}
) __cdb_src_query, __cdb_grid_params
WHERE the_geom_webmercator && cdb_point_bbox
OFFSET 0
) __cdb_src_get_x_y, __cdb_grid_params
WHERE cdb_x < __cdb_grid_params.cdb_xmax AND cdb_y < __cdb_grid_params.cdb_ymax
) __cdb_src_gridded, __cdb_grid_params
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y ${dimensionNames(ctx)}
${havingClause(ctx)}
) __cdb_aggregation_src
${aggregatedPoint(ctx, false)}