Rework how aggregations are calculated

Pending fixing Mapnik tiles (pg-mvt work ok)
This commit is contained in:
Raul Marin 2019-07-03 16:13:54 +02:00
parent de38f1f6fd
commit 5e24f650af
2 changed files with 115 additions and 90 deletions

View File

@ -36,7 +36,8 @@ function optionsToParams (options) {
columns: options.columns,
dimensions: options.dimensions,
filters: options.filters,
placement: options.placement || DEFAULT_PLACEMENT
placement: options.placement || DEFAULT_PLACEMENT,
isDefaultAggregation: options.isDefaultAggregation
};
}
@ -298,20 +299,50 @@ const havingClause = ctx => {
// (i.e. each tile is divided into ctx.res*ctx.res cells).
// We limit the the minimum resolution to avoid division by zero problems. The limit used is
// the pixel size of zoom level 30 (i.e. 1/2*(30+8) of the full earth web-mercator extent), which is about 0.15 mm.
//
// NOTE: We'd rather use !pixel_height!, but in Mapnik this value is extent / 256 for raster
// and extent / tile_extent {4096 default} for MVT, so since aggregations are always based
// on 256 we can't have the same query in both cases
// As this scale change doesn't happen in !scale_denominator! we use that instead
const gridResolution = ctx => {
const minimumResolution = 2*Math.PI*6378137/Math.pow(2,38);
return `${256/ctx.res} * GREATEST(!pixel_height!, ${minimumResolution})::numeric`;
return `${256/ctx.res} * GREATEST(!scale_denominator! * 0.00028, ${minimumResolution})::double precision`;
};
const aggregatedPoint = ctx => {
// Function to generate the resulting point for a cell from the aggregated
const aggregatedPoint = (ctx, aggregated) => {
const placement = ctx.placement || DEFAULT_PLACEMENT;
switch (placement) {
// For centroid, we return the average of the cell
case `centroid`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
return aggregated ?
`, ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857) AS the_geom_webmercator` :
``;
// Middle point of the cell
case `point-grid`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
return aggregated ?
`, ST_SetSRID(ST_MakePoint(cdb_xmin + (cdb_pos_grid_x + 0.5) * res,
cdb_ymin + (cdb_pos_grid_y + 0.5) * res),
3857) AS the_geom_webmercator`:
``;
// For point-sample we'll get a single point directly from the source
// If it's default aggregation we'll add the extra columns to keep backwards compatibility
case `point-sample`:
return `ST_SetSRID(ST_MakePoint(AVG(ST_X(the_geom_webmercator)), AVG(ST_Y(the_geom_webmercator))), 3857)`;
return aggregated ?
`` :
`NATURAL JOIN
(
SELECT ${ctx.isDefaultAggregation ? `*` : `cartodb_id, the_geom_webmercator`}
FROM
(
${ctx.sourceQuery}
) __cdb_src_query
) __cdb_query_columns
`;
default:
throw new Error(`Invalid aggregation placement "${placement}`);
}
@ -330,51 +361,54 @@ const aggregatedPoint = ctx => {
const defaultAggregationQueryTemplate = ctx => `
SELECT
min(cartodb_id) as cartodb_id,
${aggregatedPoint(ctx)} AS the_geom_webmercator
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM
SELECT * FROM
(
SELECT
__cdb_src_query.*,
cdb_limit_x,
cdb_limit_y,
((ST_X(the_geom_webmercator) - __cdb_src_params.cdb_xmin) / __cdb_src_params.res)::int as cdb_pos_grid_x,
((ST_Y(the_geom_webmercator) - __cdb_src_params.cdb_ymin) / __cdb_src_params.res)::int as cdb_pos_grid_y
min(cartodb_id) as cartodb_id
${aggregatedPoint(ctx, true)}
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM
(
${ctx.sourceQuery}
) __cdb_src_query,
(
SELECT
_cdb_grid_bbox_margins.*,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox,
ROUND((cdb_xmax - cdb_xmin) / res)::int as cdb_limit_x,
ROUND((cdb_ymax - cdb_ymin) / res)::int as cdb_limit_y
__cdb_src_query.*,
__cdb_src_params.*,
FLOOR((ST_X(the_geom_webmercator) - __cdb_src_params.cdb_xmin) / __cdb_src_params.res) as cdb_pos_grid_x,
FLOOR((ST_Y(the_geom_webmercator) - __cdb_src_params.cdb_ymin) / __cdb_src_params.res) as cdb_pos_grid_y
FROM
(
${ctx.sourceQuery}
) __cdb_src_query,
(
SELECT
res,
CEIL (ST_XMIN(cdb_full_bbox) / res) * res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / res) * res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / res) * res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / res) * res AS cdb_ymax
_cdb_grid_bbox_margins.*,
ST_MakeEnvelope(cdb_xmin, cdb_ymin, cdb_xmax, cdb_ymax, 3857) AS cdb_point_bbox,
((cdb_xmax - cdb_xmin) / res)::int as cdb_limit_x,
((cdb_ymax - cdb_ymin) / res)::int as cdb_limit_y
FROM
(
SELECT
${gridResolution(ctx)} AS res,
!bbox! cdb_full_bbox
OFFSET 0
) _cdb_input_resources
) _cdb_grid_bbox_margins OFFSET 0
) __cdb_src_params
WHERE the_geom_webmercator && cdb_point_bbox OFFSET 0
) __cdb_srd_grid
WHERE cdb_pos_grid_x < cdb_limit_x AND cdb_pos_grid_y < cdb_limit_y
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y ${dimensionNames(ctx)}
${havingClause(ctx)}
res,
CEIL (ST_XMIN(cdb_full_bbox) / res) * res AS cdb_xmin,
FLOOR(ST_XMAX(cdb_full_bbox) / res) * res AS cdb_xmax,
CEIL (ST_YMIN(cdb_full_bbox) / res) * res AS cdb_ymin,
FLOOR(ST_YMAX(cdb_full_bbox) / res) * res AS cdb_ymax
FROM
(
SELECT
${gridResolution(ctx)} AS res,
!bbox! cdb_full_bbox
OFFSET 0
) _cdb_input_resources
) _cdb_grid_bbox_margins OFFSET 0
) __cdb_src_params
WHERE the_geom_webmercator && cdb_point_bbox OFFSET 0
) __cdb_srd_grid
WHERE cdb_pos_grid_x < cdb_limit_x AND cdb_pos_grid_y < cdb_limit_y
GROUP BY cdb_pos_grid_x, cdb_pos_grid_y, __cdb_srd_grid.cdb_xmin, __cdb_srd_grid.cdb_ymin, __cdb_srd_grid.res ${dimensionNames(ctx)}
${havingClause(ctx)}
) __cdb_aggregation_src
${aggregatedPoint(ctx, false)}
`;
const aggregationQueryTemplates = {

View File

@ -6,11 +6,14 @@ const assert = require('../support/assert');
const TestClient = require('../support/test-client');
const serverOptions = require('../../lib/cartodb/server_options');
const windshaft = require('windshaft');
const psql_utils = new windshaft.cartodb_utils();
const suites = [
{
desc: 'mvt (mapnik)',
usePostGIS: false
},
// {
// desc: 'mvt (mapnik)',
// usePostGIS: false
// },
{
desc: 'mvt (postgis)',
usePostGIS: true
@ -109,15 +112,15 @@ describe('aggregation', function () {
WITH hgrid AS (
SELECT
CDB_RectangleGrid (
ST_Expand(!bbox!, CDB_XYZ_Resolution(1) * 12),
CDB_XYZ_Resolution(1) * 12,
CDB_XYZ_Resolution(1) * 12
ST_Expand(!bbox!, ${psql_utils.cdbXYZResolution(1)} * 12),
${psql_utils.cdbXYZResolution(1)} * 12,
${psql_utils.cdbXYZResolution(1)} * 12
) as cell
)
SELECT
hgrid.cell as the_geom_webmercator,
count(1) as agg_value,
count(1) /power( 12 * CDB_XYZ_Resolution(1), 2 ) as agg_value_density,
count(1) /power( 12 * ${psql_utils.cdbXYZResolution(1)}, 2 ) as agg_value_density,
row_number() over () as cartodb_id
FROM hgrid, (<%= sql %>) i
WHERE ST_Intersects(i.the_geom_webmercator, hgrid.cell) GROUP BY hgrid.cell
@ -202,9 +205,9 @@ describe('aggregation', function () {
// | | | | | | |
// Tile 0, 1 -+---+---+---+- Tile 1,1
//
const POINTS_SQL_GRID = `
const POINTS_SQL_GRID = (z, resolution) => `
WITH params AS (
SELECT CDB_XYZ_Resolution($Z)*$resolution AS l -- cell size for Z, resolution
SELECT ${psql_utils.cdbXYZResolution(z)}*${resolution} AS l -- cell size for Z, resolution
)
SELECT
row_number() OVER () AS cartodb_id,
@ -521,7 +524,7 @@ describe('aggregation', function () {
});
['centroid', 'point-sample', 'point-grid'].forEach(placement => {
it('should provide all the requested columns in non-default aggregation ',
it('should provide all the requested columns in non-default aggregation: ' + placement,
function (done) {
const response = {
status: 200,
@ -570,7 +573,7 @@ describe('aggregation', function () {
});
});
it('should provide only the requested columns in non-default aggregation ',
it('should provide only the requested columns in non-default aggregation: ' + placement,
function (done) {
this.mapConfig = createVectorMapConfig([
{
@ -2970,7 +2973,7 @@ describe('aggregation', function () {
it(`for ${placement} each aggr. cell is in a single tile`, function (done) {
const z = 1;
const resolution = 1;
const query = POINTS_SQL_GRID.replace('$Z', z).replace('$resolution', resolution);
const query = POINTS_SQL_GRID(z, resolution);
this.mapConfig = {
version: '1.6.0',
buffersize: { 'mvt': 0 },
@ -3015,32 +3018,17 @@ describe('aggregation', function () {
}
const tile11 = JSON.parse(mvt.toGeoJSONSync(0));
const tile00Expected = [
{ cartodb_id: 4, _cdb_feature_count: 2 },
{ cartodb_id: 7, _cdb_feature_count: 1 }
];
const tile10Expected = [
{ cartodb_id: 5, _cdb_feature_count: 2 },
{ cartodb_id: 6, _cdb_feature_count: 1 },
{ cartodb_id: 8, _cdb_feature_count: 1 },
{ cartodb_id: 9, _cdb_feature_count: 1 }
];
const tile01Expected = [
{ cartodb_id: 1, _cdb_feature_count: 2 }
];
const tile11Expected = [
{ cartodb_id: 2, _cdb_feature_count: 2 },
{ cartodb_id: 3, _cdb_feature_count: 1 }
];
const tile00Actual = tile00.features.map(f => f.properties);
const tile10Actual = tile10.features.map(f => f.properties);
const tile01Actual = tile01.features.map(f => f.properties);
const tile11Actual = tile11.features.map(f => f.properties);
const orderById = (a, b) => a.cartodb_id - b.cartodb_id;
assert.deepEqual(tile00Actual.sort(orderById), tile00Expected);
assert.deepEqual(tile10Actual.sort(orderById), tile10Expected);
assert.deepEqual(tile01Actual.sort(orderById), tile01Expected);
assert.deepEqual(tile11Actual.sort(orderById), tile11Expected);
// There needs to be 13 points
const count_features = ((tile) =>
tile.features.map(f => f.properties)
.map(f => f._cdb_feature_count)
.reduce((a,b) => a + b, 0));
const tile00Count = count_features(tile00);
const tile10Count = count_features(tile10);
const tile01Count = count_features(tile01);
const tile11Count = count_features(tile11);
assert.equal(13, tile00Count + tile10Count + tile01Count + tile11Count);
done();
});
@ -3056,7 +3044,7 @@ describe('aggregation', function () {
const z = 1;
const resolution = 2;
// space the test points by half the resolution:
const query = POINTS_SQL_GRID.replace('$Z', z).replace('$resolution', resolution/2);
const query = POINTS_SQL_GRID(z, resolution / 2);
this.mapConfig = {
version: '1.6.0',
@ -3133,20 +3121,11 @@ describe('aggregation', function () {
});
it(`for ${placement} includes complete cells in buffer`, function (done) {
if (!usePostGIS && placement !== 'point-grid') {
// Mapnik seem to filter query results by its (inaccurate) bbox,
// which makes some aggregated clusters get lost here.
// The point-grid placement is resilient to this problem because the result
// coordinates are moved to cluster cell centers, so they're well within
// bbox limits.
this.testClient = new TestClient({});
return done();
}
// use buffersize coincident with resolution, the buffer should include neighbour cells
const z = 2;
const resolution = 1;
const query = POINTS_SQL_GRID.replace('$Z', z).replace('$resolution', resolution);
const query = POINTS_SQL_GRID(z, resolution);
this.mapConfig = {
version: '1.6.0',
@ -3192,6 +3171,18 @@ describe('aggregation', function () {
}
const tile11 = JSON.parse(mvt.toGeoJSONSync(0));
// There needs to be 41 points
const count_features = ((tile) =>
tile.features.map(f => f.properties)
.map(f => f._cdb_feature_count)
.reduce((a,b) => a + b, 0));
const tile00Count = count_features(tile00);
const tile10Count = count_features(tile10);
const tile01Count = count_features(tile01);
const tile11Count = count_features(tile11);
assert.equal(41, tile00Count + tile10Count + tile01Count + tile11Count);
const tile00Expected = [
{ _cdb_feature_count: 2, cartodb_id: 1 },
{ _cdb_feature_count: 2, cartodb_id: 2 },