Merge pull request #1002 from CartoDB/aggregation-fixes

Aggregation fixes
This commit is contained in:
Javier Goizueta 2018-07-18 11:54:13 +02:00 committed by GitHub
commit fcfa763890
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 396 additions and 41 deletions

View File

@ -50,6 +50,8 @@ Bug Fixes:
- Fix `meta.stats.estimatedFeatureCount` for aggregations and queries with tokens
- Fix numeric histogram bounds when `start` and `end` are specified (#991)
- Static maps filters correctly if `layer` option is passed in the url.
- Aggregation doesn't return out-of-tile, partially aggregated clusters
- Aggregation was not accurate for high zoom, far away from the origin tiles
Announcements:
* Improve error message when the DB query is over the user's limits

View File

@ -237,38 +237,75 @@ const havingClause = ctx => {
};
// SQL expression to compute the aggregation resolution (grid cell size).
// This is equivalent to `${256/ctx.res}*CDB_XYZ_Resolution(CDB_ZoomFromScale(!scale_denominator!))`
// This is defined by the ctx.res parameter, which is the number of grid cells per tile linear dimension
// (i.e. each tile is divided into ctx.res*ctx.res cells).
// We limit the the minimum resolution to avoid division by zero problems. The limit used is
// the pixel size of zoom level 30 (i.e. 1/2*(30+8) of the full earth web-mercator extent), which is about 0.15 mm.
// Computing this using !scale_denominator!, !pixel_width! or !pixel_height! produces
// inaccurate results due to rounding present in those values.
const gridResolution = ctx => {
const minimumResolution = 2*Math.PI*6378137/Math.pow(2,38);
return `GREATEST(${256*0.00028/ctx.res}*!scale_denominator!, ${minimumResolution})::double precision`;
const pixelSize = 'CDB_XYZ_Resolution(CDB_ZoomFromScale(!scale_denominator!))';
return `GREATEST(${256/ctx.res}*${pixelSize}, ${minimumResolution})::double precision`;
};
// Each aggregation cell is defined by the cell coordinates Floor(x/res), Floor(y/res),
// i.e. they include the West and South borders but not the East and North ones.
// So, to avoid picking points that don't belong to cells in the tile, given the tile
// limits Xmin, Ymin, Xmax, Ymax (bbox), we should select points that satisfy
// Xmin <= x < Xmax and Ymin <= y < Ymax (with x, y from the_geom_webmercator)
// On the other hand we can efficiently filter spatially (relying on spatial indexing)
// with `the_geom_webmercator && bbox` which is equivalent to
// Xmin <= x <= Xmax and Ymin <= y <= Ymax
// So, in order to be both efficient and accurate we will need to use both
// conditions for spatial filtering.
const spatialFilter = `
(_cdb_query.the_geom_webmercator && _cdb_params.bbox) AND
ST_X(_cdb_query.the_geom_webmercator) >= _cdb_params.xmin AND
ST_X(_cdb_query.the_geom_webmercator) < _cdb_params.xmax AND
ST_Y(_cdb_query.the_geom_webmercator) >= _cdb_params.ymin AND
ST_Y(_cdb_query.the_geom_webmercator) < _cdb_params.ymax
`;
// Notes:
// * We need to filter spatially using !bbox! to make the queries efficient because
// the filter added by Mapnik (wrapping the query)
// is only applied after the aggregation.
// * This queries are used for rendering and the_geom is omitted in the results for better performance
// * If the MVT extent or tile buffer was 0 or a multiple of the resolution we could use directly
// the bbox for them, but in general we need to find the nearest cell limits inside the bbox.
// * bbox coordinates can have an error in the last digites; we apply a small correction before
// applying CEIL or FLOOR to compensate for this.
const sqlParams = (ctx) => `
_cdb_res AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox,
(2*2.220446049250313e-16::double precision) AS eps
),
_cdb_params AS (
SELECT
res,
bbox,
CEIL((ST_XMIN(bbox) - eps*ABS(ST_XMIN(bbox)))/res)*res AS xmin,
FLOOR((ST_XMAX(bbox) + eps*ABS(ST_XMAX(bbox)))/res)*res AS xmax,
CEIL((ST_YMIN(bbox) - eps*ABS(ST_YMIN(bbox)))/res)*res AS ymin,
FLOOR((ST_YMAX(bbox) + eps*ABS(ST_YMAX(bbox)))/res)*res AS ymax
FROM _cdb_res
)
`;
// The special default aggregation includes all the columns of a sample row per grid cell and
// the count (_cdb_feature_count) of the aggregated rows.
const defaultAggregationQueryTemplate = ctx => `
WITH
_cdb_params AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox
),
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(cartodb_id) AS cartodb_id
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE _cdb_query.the_geom_webmercator && _cdb_params.bbox
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)
@ -283,12 +320,7 @@ const defaultAggregationQueryTemplate = ctx => `
const aggregationQueryTemplates = {
'centroid': ctx => `
WITH
_cdb_params AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox
)
WITH ${sqlParams(ctx)}
SELECT
MIN(_cdb_query.cartodb_id) AS cartodb_id,
ST_SetSRID(
@ -300,7 +332,7 @@ const aggregationQueryTemplates = {
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE _cdb_query.the_geom_webmercator && _cdb_params.bbox
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)
@ -309,12 +341,7 @@ const aggregationQueryTemplates = {
`,
'point-grid': ctx => `
WITH
_cdb_params AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox
),
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(_cdb_query.cartodb_id) AS cartodb_id,
@ -323,7 +350,7 @@ const aggregationQueryTemplates = {
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE the_geom_webmercator && _cdb_params.bbox
WHERE ${spatialFilter}
GROUP BY _cdb_gx, _cdb_gy ${dimensionNames(ctx)}
${havingClause(ctx)}
)
@ -336,19 +363,14 @@ const aggregationQueryTemplates = {
`,
'point-sample': ctx => `
WITH
_cdb_params AS (
SELECT
${gridResolution(ctx)} AS res,
!bbox! AS bbox
),
WITH ${sqlParams(ctx)},
_cdb_clusters AS (
SELECT
MIN(cartodb_id) AS cartodb_id
${dimensionDefs(ctx)}
${aggregateColumnDefs(ctx)}
FROM (${ctx.sourceQuery}) _cdb_query, _cdb_params
WHERE _cdb_query.the_geom_webmercator && _cdb_params.bbox
WHERE ${spatialFilter}
GROUP BY
Floor(ST_X(_cdb_query.the_geom_webmercator)/_cdb_params.res),
Floor(ST_Y(_cdb_query.the_geom_webmercator)/_cdb_params.res)

View File

@ -134,6 +134,100 @@ describe('aggregation', function () {
from generate_series(0, 13) x
`;
// Some points at corners and centers of aggregation grids (zoom=1, resolution=1)
// @ = point location
// --+---+---+---+---+---+---+--
// | | | | | | |
// --+---+---@---@---@---+---+--
// | | | @ | @ | | |
// --+---+---@---@---@---+---+--- Y = 0
// | | | @ | @ | | |
// --+---+---@---@---@---+---+--
// | | | | | | |
// --+---+---+---+---+---+---+--
// |
// X = 0
// Point identifiers (cartodb_id)
// --+---+---+---+---+---+---+--
// | | | | | | |
// --+---+---7---8---9---+---+--
// | | |12 |13 | | |
// --+---+---4---5---6---+---+---
// | | |10 |11 | | |
// --+---+---1---2---3---+---+--
// | | | | | | |
// --+---+---+---+---+---+---+--
// Point count per aggregation cell and Z=1 tiles
//
// Tile 0,0 -+---+---+---+- Tile 1,0
// | | | 1 | 1 | 1 | |
// --+---+---@---@---@---+---+--
// | | | 2 | 2 | 1 | |
// --+---+---@---@---@---+---+---
// | | | 2 | 2 | 1 | |
// --+---+---@---@---@---+---+--
// | | | | | | |
// Tile 0, 1 -+---+---+---+- Tile 1,1
//
const POINTS_SQL_GRID = `
WITH params AS (
SELECT CDB_XYZ_Resolution(1) AS l -- cell size for Z=1 res=1
)
SELECT
row_number() OVER () AS cartodb_id,
ST_SetSRID(ST_MakePoint(x*l, y*l), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(x*l, y*l), 3857), 4326) AS the_geom
FROM params, generate_series(-1,1) x, generate_series(-1,1) y
UNION ALL
SELECT
row_number() OVER () + 9 AS cartodb_id,
ST_SetSRID(ST_MakePoint(x*l-l/2, y*l-l/2), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(x*l-l/2, y*l-l/2), 3857), 4326) AS the_geom
FROM params, generate_series(0,1) x, generate_series(0,1) y
`;
// Points positioned inside one cell of Z=20, X=1000000, X=1000000 (the SW corner)
// The center of the cell is x = 18181005.874444414, y = -18181043.94366749
const POINTS_SQL_CELL = `
SELECT
1 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181005.8, -18181043.9), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181005.8, -18181043.9), 3857), 4326) AS the_geom
UNION ALL SELECT
2 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181005.9, -18181044.0), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181005.9, -18181044.0), 3857), 4326) AS the_geom
UNION ALL SELECT
3 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181005.87, -18181043.94), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181005.87, -18181043.94), 3857), 4326) AS the_geom
UNION ALL SELECT
4 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181005.8, -18181043.9), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181005.8, -18181043.9), 3857), 4326) AS the_geom
`;
// Points positioned inside one cell of Z=20, X=1000000, X=1000000 (inner cell not on border)
// The center of the cell is x = 18181006.023735486, y = -18181043.794376418
const POINTS_SQL_CELL_INNER = `
SELECT
1 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181005.95, -18181043.8), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181005.95, -18181043.8), 3857), 4326) AS the_geom
UNION ALL SELECT
2 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181006.09, -18181043.72), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181006.09, -18181043.72), 3857), 4326) AS the_geom
UNION ALL SELECT
3 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181006.02, -18181043.79), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181006.02, -18181043.79), 3857), 4326) AS the_geom
UNION ALL SELECT
4 AS cartodb_id,
ST_SetSRID(ST_MakePoint(18181006.01, -18181043.75), 3857) AS the_geom_webmercator,
ST_Transform(ST_SetSRID(ST_MakePoint(18181006.01, -18181043.75), 3857), 4326) AS the_geom
`;
function createVectorMapConfig (layers = [
{
type: 'cartodb',
@ -1151,7 +1245,6 @@ describe('aggregation', function () {
type: 'cartodb',
options: {
sql: POINTS_SQL_1,
resolution: 256,
aggregation: {
threshold: 1
}
@ -2162,9 +2255,9 @@ describe('aggregation', function () {
options: {
sql: POINTS_SQL_0,
resolution: 1,
aggregation: {
threshold: 1
threshold: 1,
resolution: 1
}
}
}
@ -2280,6 +2373,244 @@ describe('aggregation', function () {
});
});
it(`for ${placement} each aggr. cell is in a single tile`, function (done) {
this.mapConfig = {
version: '1.6.0',
buffersize: { 'mvt': 0 },
layers: [
{
type: 'cartodb',
options: {
sql: POINTS_SQL_GRID,
aggregation: {
threshold: 1,
resolution: 1
}
}
}
]
};
if (placement !== 'default') {
this.mapConfig.layers[0].options.aggregation.placement = placement;
}
this.testClient = new TestClient(this.mapConfig);
this.testClient.getTile(1, 0, 0, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile00 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 0, 1, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile01 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 1, 0, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile10 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 1, 1, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile11 = JSON.parse(mvt.toGeoJSONSync(0));
const tile00Expected = [
{ cartodb_id: 4, _cdb_feature_count: 2 },
{ cartodb_id: 7, _cdb_feature_count: 1 }
];
const tile10Expected = [
{ cartodb_id: 5, _cdb_feature_count: 2 },
{ cartodb_id: 6, _cdb_feature_count: 1 },
{ cartodb_id: 8, _cdb_feature_count: 1 },
{ cartodb_id: 9, _cdb_feature_count: 1 }
];
const tile01Expected = [
{ cartodb_id: 1, _cdb_feature_count: 2 }
];
const tile11Expected = [
{ cartodb_id: 2, _cdb_feature_count: 2 },
{ cartodb_id: 3, _cdb_feature_count: 1 }
];
const tile00Actual = tile00.features.map(f => f.properties);
const tile10Actual = tile10.features.map(f => f.properties);
const tile01Actual = tile01.features.map(f => f.properties);
const tile11Actual = tile11.features.map(f => f.properties);
const orderById = (a, b) => a.cartodb_id - b.cartodb_id;
assert.deepEqual(tile00Actual.sort(orderById), tile00Expected);
assert.deepEqual(tile10Actual.sort(orderById), tile10Expected);
assert.deepEqual(tile01Actual.sort(orderById), tile01Expected);
assert.deepEqual(tile11Actual.sort(orderById), tile11Expected);
done();
});
});
});
});
});
it(`for ${placement} no partially aggregated cells`, function (done) {
// Use level 1 with resolution 2 tiles and buffersize 1 (half the cell size)
// Only the cells completely inside the buffer are aggregated
this.mapConfig = {
version: '1.6.0',
buffersize: { 'mvt': 1 },
layers: [
{
type: 'cartodb',
options: {
sql: POINTS_SQL_GRID,
aggregation: {
threshold: 1,
resolution: 2
}
}
}
]
};
if (placement !== 'default') {
this.mapConfig.layers[0].options.aggregation.placement = placement;
}
this.testClient = new TestClient(this.mapConfig);
this.testClient.getTile(1, 0, 0, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile00 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 0, 1, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile01 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 1, 0, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile10 = JSON.parse(mvt.toGeoJSONSync(0));
this.testClient.getTile(1, 1, 1, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile11 = JSON.parse(mvt.toGeoJSONSync(0));
const tile00Expected = [
{ cartodb_id: 4, _cdb_feature_count: 3 }
];
const tile10Expected = [
{ cartodb_id: 5, _cdb_feature_count: 5 }
];
const tile01Expected = [
{ cartodb_id: 1, _cdb_feature_count: 2 }
];
const tile11Expected = [
{ cartodb_id: 2, _cdb_feature_count: 3 }
];
const tile00Actual = tile00.features.map(f => f.properties);
const tile10Actual = tile10.features.map(f => f.properties);
const tile01Actual = tile01.features.map(f => f.properties);
const tile11Actual = tile11.features.map(f => f.properties);
const orderById = (a, b) => a.cartodb_id - b.cartodb_id;
assert.deepEqual(tile00Actual.sort(orderById), tile00Expected);
assert.deepEqual(tile10Actual.sort(orderById), tile10Expected);
assert.deepEqual(tile01Actual.sort(orderById), tile01Expected);
assert.deepEqual(tile11Actual.sort(orderById), tile11Expected);
done();
});
});
});
});
});
it(`for ${placement} points aggregated into corner cluster`, function (done) {
// this test will fail due to !bbox! lack of accuracy if strict cell filtering is in place
this.mapConfig = {
version: '1.6.0',
buffersize: { 'mvt': 0 },
layers: [
{
type: 'cartodb',
options: {
sql: POINTS_SQL_CELL,
aggregation: {
threshold: 1,
resolution: 1
}
}
}
]
};
if (placement !== 'default') {
this.mapConfig.layers[0].options.aggregation.placement = placement;
}
this.testClient = new TestClient(this.mapConfig);
this.testClient.getTile(20, 1000000, 1000000, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile = JSON.parse(mvt.toGeoJSONSync(0));
assert.equal(tile.features.length, 1);
assert.equal(tile.features[0].properties._cdb_feature_count, 4);
if (placement === 'point-grid') {
// check geometry x = 18181005.874444414, y = -18181043.94366749
const expectedPoint = [ 163.322754576802, -83.3823797469878 ];
assert.deepEqual(tile.features[0].geometry.coordinates, expectedPoint);
}
done();
});
});
it(`for ${placement} points aggregated into correct cluster`, function (done) {
this.mapConfig = {
version: '1.6.0',
buffersize: { 'mvt': 0 },
layers: [
{
type: 'cartodb',
options: {
sql: POINTS_SQL_CELL_INNER,
aggregation: {
threshold: 1,
resolution: 1
}
}
}
]
};
if (placement !== 'default') {
this.mapConfig.layers[0].options.aggregation.placement = placement;
}
this.testClient = new TestClient(this.mapConfig);
this.testClient.getTile(20, 1000000, 1000000, { format: 'mvt' }, (err, res, mvt) => {
if (err) {
return done(err);
}
const tile = JSON.parse(mvt.toGeoJSONSync(0));
assert.equal(tile.features.length, 1);
assert.equal(tile.features[0].properties._cdb_feature_count, 4);
if (placement === 'point-grid') {
// check geometry x = 18181006.023735486, y = -18181043.794376418
const expectedPoint = [ 163.322755917907, -83.3823795924354 ];
assert.deepEqual(tile.features[0].geometry.coordinates, expectedPoint);
}
done();
});
});
});
['default', 'centroid', 'point-sample', 'point-grid'].forEach(placement => {
@ -2294,9 +2625,9 @@ describe('aggregation', function () {
options: {
sql: POINTS_SQL_PAIRS,
resolution: 1,
aggregation: {
threshold: 1
threshold: 1,
resolution: 1
}
}
}
@ -2332,9 +2663,9 @@ describe('aggregation', function () {
options: {
sql: POINTS_SQL_PAIRS,
resolution: 1,
aggregation: {
threshold: 1
threshold: 1,
resolution: 1
},
metadata: {
aggrFeatureCount: 10
@ -2373,9 +2704,9 @@ describe('aggregation', function () {
options: {
sql: POINTS_SQL_PAIRS,
resolution: 1,
aggregation: {
threshold: 1
threshold: 1,
resolution: 1
},
metadata: {
aggrFeatureCount: 0
@ -2414,9 +2745,9 @@ describe('aggregation', function () {
options: {
sql: POINTS_SQL_PAIRS,
resolution: 1,
aggregation: {
threshold: 1
threshold: 1,
resolution: 1
},
metadata: {
featureCount: true