Aggregation: Improve speeeeeeed

This commit is contained in:
Raul Marin 2019-07-08 15:51:55 +02:00
parent 8454eef6e9
commit 262f957218

View File

@ -310,26 +310,39 @@ const gridResolution = ctx => {
}; };
// SQL query to extra the boundaries of the area to be aggregated and the grid resolution // SQL query to extra the boundaries of the area to be aggregated and the grid resolution
// cdb_{x-y}{min_max} return the limits of the tile. Aggregations do [min, max) in both axis
// cdb_res: Aggregation resolution (as specified by gridResolution)
// cdb_point_bbox: Tile bounding box [min, max]
const gridInfoQuery = ctx => { const gridInfoQuery = ctx => {
return ` return `
SELECT SELECT
res, cdb_xmin,
CEIL (ST_XMIN(cdb_full_bbox) / res) * res AS cdb_xmin, cdb_ymin,
FLOOR(ST_XMAX(cdb_full_bbox) / res) * res AS cdb_xmax, cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / res) * res AS cdb_ymin, cdb_ymax,
FLOOR(ST_YMAX(cdb_full_bbox) / res) * res AS cdb_ymax cdb_res,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox
FROM FROM
( (
SELECT SELECT
${gridResolution(ctx)} AS res, cdb_res,
CEIL (ST_XMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / cdb_res) * cdb_res AS cdb_ymax
FROM
(
SELECT
${gridResolution(ctx)} AS cdb_res,
!bbox! cdb_full_bbox !bbox! cdb_full_bbox
OFFSET 0
) _cdb_input_resources ) _cdb_input_resources
) _cdb_grid_bbox_margins
`; `;
}; };
// Function to generate the resulting point for a cell from the aggregated // Function to generate the resulting point for a cell from the aggregated data
// Point sample joins the query with itself to get the data from the lowest id
const aggregatedPoint = (ctx, aggregated) => { const aggregatedPoint = (ctx, aggregated) => {
const placement = ctx.placement || DEFAULT_PLACEMENT; const placement = ctx.placement || DEFAULT_PLACEMENT;
switch (placement) { switch (placement) {
@ -337,15 +350,13 @@ const aggregatedPoint = (ctx, aggregated) => {
// For centroid, we return the average of the cell // For centroid, we return the average of the cell
case `centroid`: case `centroid`:
return aggregated ? return aggregated ?
`, ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857) AS the_geom_webmercator` : `, ST_SetSRID(ST_MakePoint(AVG(cdb_x), AVG(cdb_y)), 3857) AS the_geom_webmercator` :
``; ``;
// Middle point of the cell // Middle point of the cell
case `point-grid`: case `point-grid`:
return aggregated ? return aggregated ?
`, ST_SetSRID(ST_MakePoint(cdb_xmin + (cdb_pos_grid_x + 0.5) * res, `, ST_SetSRID(ST_MakePoint(cdb_pos_grid_x, cdb_pos_grid_y), 3857) AS the_geom_webmercator`:
cdb_ymin + (cdb_pos_grid_y + 0.5) * res),
3857) AS the_geom_webmercator`:
``; ``;
// For point-sample we'll get a single point directly from the source // For point-sample we'll get a single point directly from the source
@ -368,19 +379,29 @@ const aggregatedPoint = (ctx, aggregated) => {
} }
}; };
// Notes: // Function to generate the values common to all points in a cell
// * We need to filter spatially using !bbox! to make the queries efficient because // By default we use the cell number (which is fast), but for point-grid we
// the filter added by Mapnik (wrapping the query) // get the coordinates of the mid point so we don't need to calculate them later
// is only applied after the aggregation. // which requires extra data in the group by clause
// * This queries are used for rendering and the_geom is omitted in the results for better performance const aggregatedPosCoordinate = (ctx, coordinate) => {
// * If the MVT extent or tile buffer was 0 or a multiple of the resolution we could use directly const placement = ctx.placement || DEFAULT_PLACEMENT;
// the bbox for them, but in general we need to find the nearest cell limits inside the bbox. switch (placement) {
// * bbox coordinates can have an error in the last digits; we apply a small correction before // For point-grid we return the coordinate of the middle point of the grid
// applying CEIL or FLOOR to compensate for this, so that coordinates closer than a small (`eps`) case `point-grid`:
// fraction of the cell size to a cell limit are moved to the exact limit. return `(FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res) + 0.5) * __cdb_grid_params.cdb_res`;
// For other, we return the cell position (relative to the world)
default:
return `FLOOR(cdb_${coordinate} / __cdb_grid_params.cdb_res)`;
}
};
const defaultAggregationQueryTemplate = ctx => ` const defaultAggregationQueryTemplate = ctx => `
WITH __cdb_grid_params AS
(
${gridInfoQuery(ctx)}
)
SELECT * FROM SELECT * FROM
( (
SELECT SELECT
@ -389,31 +410,27 @@ SELECT * FROM
${dimensionDefs(ctx)} ${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)} ${aggregateColumnDefs(ctx)}
FROM FROM
(
SELECT
*,
${aggregatedPosCoordinate(ctx, 'x')} as cdb_pos_grid_x,
${aggregatedPosCoordinate(ctx, 'y')} as cdb_pos_grid_y
FROM
( (
SELECT SELECT
__cdb_src_query.*, __cdb_src_query.*,
__cdb_src_params.*, ST_X(the_geom_webmercator) cdb_x,
FLOOR((ST_X(the_geom_webmercator) - __cdb_src_params.cdb_xmin) / __cdb_src_params.res) as cdb_pos_grid_x, ST_Y(the_geom_webmercator) cdb_y
FLOOR((ST_Y(the_geom_webmercator) - __cdb_src_params.cdb_ymin) / __cdb_src_params.res) as cdb_pos_grid_y
FROM FROM
( (
${ctx.sourceQuery} ${ctx.sourceQuery}
) __cdb_src_query, ) __cdb_src_query, __cdb_grid_params
( WHERE the_geom_webmercator && cdb_point_bbox
SELECT OFFSET 0
_cdb_grid_bbox_margins.*, ) __cdb_src_get_x_y, __cdb_grid_params
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox, WHERE cdb_x < __cdb_grid_params.cdb_xmax AND cdb_y < __cdb_grid_params.cdb_ymax
((cdb_xmax - cdb_xmin) / res)::int as cdb_limit_x, ) __cdb_src_gridded, __cdb_grid_params
((cdb_ymax - cdb_ymin) / res)::int as cdb_limit_y GROUP BY cdb_pos_grid_x, cdb_pos_grid_y ${dimensionNames(ctx)}
FROM
(
${gridInfoQuery(ctx)}
) _cdb_grid_bbox_margins OFFSET 0
) __cdb_src_params
WHERE the_geom_webmercator && cdb_point_bbox OFFSET 0
) __cdb_srd_grid
WHERE cdb_pos_grid_x < cdb_limit_x AND cdb_pos_grid_y < cdb_limit_y
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y, __cdb_srd_grid.cdb_xmin, __cdb_srd_grid.cdb_ymin, __cdb_srd_grid.res ${dimensionNames(ctx)}
${havingClause(ctx)} ${havingClause(ctx)}
) __cdb_aggregation_src ) __cdb_aggregation_src
${aggregatedPoint(ctx, false)} ${aggregatedPoint(ctx, false)}