Windshaft-cartodb/lib/backends/layer-stats/mapnik-layer-stats.js

333 lines
12 KiB
JavaScript
Raw Normal View History

'use strict';
const queryUtils = require('../../utils/query-utils');
2018-05-08 01:03:19 +08:00
const AggregationMapConfig = require('../../models/aggregation/aggregation-mapconfig');
2018-10-05 01:50:14 +08:00
const aggregationQuery = require('../../models/aggregation/aggregation-query');
function MapnikLayerStats () {
this._types = {
mapnik: true,
cartodb: true
};
}
MapnikLayerStats.prototype.is = function (type) {
return this._types[type] ? this._types[type] : false;
};
2019-10-22 01:07:24 +08:00
function columnAggregations (field) {
if (field.type === 'number') {
return ['min', 'max', 'avg', 'sum'];
}
if (field.type === 'date') { // TODO other types too?
return ['min', 'max'];
}
2018-10-05 01:50:14 +08:00
if (field.type === 'timeDimension') {
return ['min', 'max'];
}
2018-05-09 02:07:20 +08:00
return [];
}
2018-05-08 01:03:19 +08:00
2019-10-22 01:07:24 +08:00
function _getSQL (ctx, query, type = 'pre', zoom = 0) {
let sql;
if (type === 'pre') {
2018-05-16 20:45:34 +08:00
sql = ctx.preQuery;
2019-10-22 01:07:24 +08:00
} else {
sql = ctx.aggrQuery;
}
2019-09-13 22:32:37 +08:00
sql = queryUtils.substituteTokensForZoom(sql, zoom || 0);
return query(sql);
}
2018-05-12 01:32:03 +08:00
2019-10-22 01:07:24 +08:00
function _estimatedFeatureCount (ctx) {
2018-06-01 00:37:43 +08:00
return queryUtils.queryPromise(ctx.dbConnection, _getSQL(ctx, queryUtils.getQueryRowEstimation))
.then(res => ({ estimatedFeatureCount: res.rows[0].rows }))
.catch(() => ({ estimatedFeatureCount: -1 }));
2018-05-12 01:32:03 +08:00
}
2018-05-08 01:03:19 +08:00
2019-10-22 01:07:24 +08:00
function _featureCount (ctx) {
if (ctx.metaOptions.featureCount) {
// TODO: if ctx.metaOptions.columnStats we can combine this with column stats query
2018-06-01 00:37:43 +08:00
return queryUtils.queryPromise(ctx.dbConnection, _getSQL(ctx, queryUtils.getQueryActualRowCount))
.then(res => ({ featureCount: res.rows[0].rows }));
2018-05-08 01:03:19 +08:00
}
return Promise.resolve();
2018-05-12 01:32:03 +08:00
}
2018-05-08 01:03:19 +08:00
2019-10-22 01:07:24 +08:00
function _aggrFeatureCount (ctx) {
if (ctx.metaOptions.hasOwnProperty('aggrFeatureCount')) {
2018-05-12 00:57:14 +08:00
// We expect as zoom level as the value of aggrFeatureCount
// TODO: it'd be nice to admit an array of zoom levels to
// return metadata for multiple levels.
2018-06-01 00:37:43 +08:00
return queryUtils.queryPromise(
ctx.dbConnection,
2019-10-22 01:07:24 +08:00
_getSQL(ctx, queryUtils.getQueryActualRowCount, 'post', ctx.metaOptions.aggrFeatureCount)
).then(res => ({ aggrFeatureCount: res.rows[0].rows }));
}
return Promise.resolve();
2018-05-12 01:32:03 +08:00
}
2019-10-22 01:07:24 +08:00
function _geometryType (ctx) {
if (ctx.metaOptions.geometryType) {
const geometryColumn = AggregationMapConfig.getAggregationGeometryColumn();
2018-06-01 01:07:57 +08:00
const sqlQuery = _getSQL(ctx, sql => queryUtils.getQueryGeometryType(sql, geometryColumn));
return queryUtils.queryPromise(ctx.dbConnection, sqlQuery)
.then(res => ({ geometryType: (res.rows[0] || {}).geom_type }));
}
return Promise.resolve();
2018-05-12 01:32:03 +08:00
}
2019-10-22 01:07:24 +08:00
function _columns (ctx) {
2018-10-07 06:28:53 +08:00
if (ctx.metaOptions.columns || ctx.metaOptions.columnStats || ctx.metaOptions.dimensions) {
// note: post-aggregation columns are in layer.options.columns when aggregation is present
2018-06-01 00:37:43 +08:00
return queryUtils.queryPromise(ctx.dbConnection, _getSQL(ctx, sql => queryUtils.getQueryLimited(sql, 0)))
.then(res => formatResultFields(ctx.dbConnection, res.fields));
}
return Promise.resolve();
}
// combine a list of results merging the properties of all the objects
// undefined results are admitted and ignored
2019-10-22 01:07:24 +08:00
function mergeResults (results) {
if (results) {
if (results.length === 0) {
return {};
}
return results.reduce((a, b) => {
if (a === undefined) {
return b;
}
if (b === undefined) {
return a;
}
return Object.assign({}, a, b);
});
}
}
// deeper (1 level) combination of a list of objects:
// mergeColumns([{ col1: { a: 1 }, col2: { a: 2 } }, { col1: { b: 3 } }]) => { col1: { a: 1, b: 3 }, col2: { a: 2 } }
2019-10-22 01:07:24 +08:00
function mergeColumns (results) {
if (results) {
if (results.length === 0) {
return {};
}
return results.reduce((a, b) => {
2019-10-22 01:07:24 +08:00
const c = Object.assign({}, b || {}, a || {});
Object.keys(c).forEach(key => {
if (b.hasOwnProperty(key)) {
c[key] = Object.assign(c[key], b[key]);
}
});
return c;
});
}
2018-05-12 01:32:03 +08:00
}
const DEFAULT_SAMPLE_ROWS = 100;
2019-10-22 01:07:24 +08:00
function _sample (ctx) {
2019-08-23 23:09:24 +08:00
if (!ctx.metaOptions.sample) {
return Promise.resolve();
2018-05-08 01:03:19 +08:00
}
2019-08-23 23:09:24 +08:00
2019-08-24 00:04:19 +08:00
const limit = ctx.metaOptions.sample.num_rows || DEFAULT_SAMPLE_ROWS;
2019-08-23 23:09:24 +08:00
const columns = ctx.metaOptions.sample.include_columns;
2019-08-23 23:25:37 +08:00
const sqlMaxMin = _getSQL(ctx, sql => queryUtils.getMaxMinSpanColumnQuery(sql));
2019-08-23 23:09:24 +08:00
return queryUtils.queryPromise(ctx.dbConnection, sqlMaxMin)
.then(maxMinRes => {
const { min_id: min, id_span: span } = maxMinRes.rows[0];
if (!min || !span) {
return { rows: {} };
}
2019-08-23 23:16:35 +08:00
const values = _getSampleValuesFromRange(min, span, limit);
const sqlSample = _getSQL(ctx, sql => queryUtils.getSampleFromIdsQuery(sql, values, columns));
2019-08-23 23:09:24 +08:00
return queryUtils.queryPromise(ctx.dbConnection, sqlSample);
})
.then(res => ({ sample: res.rows }));
2018-05-12 01:32:03 +08:00
}
2018-05-08 01:03:19 +08:00
2019-08-23 23:16:35 +08:00
function _getSampleValuesFromRange (min, span, limit) {
const sample = new Set();
limit = limit < span ? limit : span;
while (sample.size < limit) {
sample.add(Math.floor(min + Math.random() * span));
}
return Array.from(sample);
}
2019-10-22 01:07:24 +08:00
function _columnsMetadataRequired (options) {
2018-10-09 01:25:04 +08:00
// We need determine the columns of a query
// if either column stats or dimension stats are required,
// since we'll ultimately use the same query to fetch both
return options.columnStats || options.dimensions;
}
2019-10-22 01:07:24 +08:00
function _columnStats (ctx, columns, dimensions) {
if (!columns) {
return Promise.resolve();
}
2018-10-09 01:25:04 +08:00
if (_columnsMetadataRequired(ctx.metaOptions)) {
2019-10-22 01:07:24 +08:00
const queries = [];
let aggr = [];
2018-10-05 01:50:14 +08:00
if (ctx.metaOptions.columnStats) {
2018-10-09 19:24:08 +08:00
queries.push(new Promise(resolve => resolve({ columns }))); // add columns as first result
2018-10-05 01:50:14 +08:00
Object.keys(columns).forEach(name => {
aggr = aggr.concat(
columnAggregations(columns[name])
2019-10-22 01:07:24 +08:00
.map(fn => `${fn}("${name}") AS "${name}_${fn}"`)
);
2018-10-05 01:50:14 +08:00
if (columns[name].type === 'string') {
const topN = ctx.metaOptions.columnStats.topCategories || 1024;
2019-10-22 01:07:24 +08:00
const includeNulls = ctx.metaOptions.columnStats.hasOwnProperty('includeNulls')
? ctx.metaOptions.columnStats.includeNulls
: true;
2018-10-05 01:50:14 +08:00
// TODO: ctx.metaOptions.columnStats.maxCategories
// => use PG stats to dismiss columns with more distinct values
queries.push(
queryUtils.queryPromise(
ctx.dbConnection,
_getSQL(ctx, sql => queryUtils.getQueryTopCategories(sql, name, topN, includeNulls))
2018-10-09 19:24:08 +08:00
).then(res => ({ columns: { [name]: { categories: res.rows } } }))
2018-10-05 01:50:14 +08:00
);
}
});
}
const dimensionsStats = {};
let dimensionsInfo = {};
if (ctx.metaOptions.dimensions && dimensions) {
dimensionsInfo = aggregationQuery.infoForOptions({ dimensions });
Object.keys(dimensionsInfo).forEach(dimName => {
const info = dimensionsInfo[dimName];
if (info.type === 'timeDimension') {
dimensionsStats[dimName] = {
params: info.params
};
aggr = aggr.concat(
columnAggregations(info).map(fn => `${fn}(${info.sql}) AS "${dimName}_${fn}"`)
);
}
});
}
queries.push(
2018-06-01 00:37:43 +08:00
queryUtils.queryPromise(
ctx.dbConnection,
_getSQL(ctx, sql => `SELECT ${aggr.join(',')} FROM (${sql}) AS __cdb_query`)
).then(res => {
2019-10-22 01:07:24 +08:00
const stats = { columns: {}, dimensions: {} };
Object.keys(columns).forEach(name => {
2018-10-09 19:24:08 +08:00
stats.columns[name] = {};
columnAggregations(columns[name]).forEach(fn => {
2018-10-09 19:24:08 +08:00
stats.columns[name][fn] = res.rows[0][`${name}_${fn}`];
});
});
2018-10-05 01:50:14 +08:00
Object.keys(dimensionsInfo).forEach(name => {
2018-10-09 19:24:08 +08:00
stats.dimensions[name] = stats.dimensions[name] || Object.assign({}, dimensionsStats[name]);
2018-10-05 01:50:14 +08:00
let type = null;
columnAggregations(dimensionsInfo[name]).forEach(fn => {
2018-10-07 00:26:43 +08:00
type = type ||
fieldTypeSafe(ctx.dbConnection, res.fields.find(f => f.name === `${name}_${fn}`));
2018-10-09 19:24:08 +08:00
stats.dimensions[name][fn] = res.rows[0][`${name}_${fn}`];
2018-10-05 01:50:14 +08:00
});
2018-10-09 19:24:08 +08:00
stats.dimensions[name].type = type;
2018-10-05 01:50:14 +08:00
});
return stats;
})
2018-05-08 01:03:19 +08:00
);
2018-10-09 19:24:08 +08:00
return Promise.all(queries).then(results => ({
columns: mergeColumns(results.map(r => r.columns)),
2019-10-22 01:07:24 +08:00
dimensions: mergeColumns(results.map(r => r.dimensions))
2018-10-09 19:24:08 +08:00
}));
2018-05-08 01:03:19 +08:00
}
return Promise.resolve({ columns });
}
2018-05-08 01:03:19 +08:00
2018-05-09 02:07:20 +08:00
// This is adapted from SQL API:
2019-10-22 01:07:24 +08:00
function fieldType (cname) {
2018-05-09 02:07:20 +08:00
let tname;
switch (true) {
2019-10-22 01:07:24 +08:00
case /bool/.test(cname):
tname = 'boolean';
break;
case /int|float|numeric/.test(cname):
tname = 'number';
break;
case /text|char|unknown/.test(cname):
tname = 'string';
break;
case /date|time/.test(cname):
tname = 'date';
break;
default:
tname = cname;
2018-05-09 02:07:20 +08:00
}
2019-10-22 01:07:24 +08:00
if (tname && cname.match(/^_/)) {
2018-05-09 02:07:20 +08:00
tname += '[]';
}
return tname;
}
2019-10-22 01:07:24 +08:00
function fieldTypeSafe (dbConnection, field) {
2018-10-05 01:50:14 +08:00
const cname = dbConnection.typeName(field.dataTypeID);
return cname ? fieldType(cname) : `unknown(${field.dataTypeID})`;
}
// columns are returned as an object { columnName1: { type1: ...}, ..}
// for consistency with SQL API
2019-10-22 01:07:24 +08:00
function formatResultFields (dbConnection, fields = []) {
const nfields = {};
for (const field of fields) {
nfields[field.name] = { type: fieldTypeSafe(dbConnection, field) };
2018-05-09 02:07:20 +08:00
}
return nfields;
}
MapnikLayerStats.prototype.getStats =
function (layer, dbConnection, callback) {
2019-10-22 01:07:24 +08:00
const aggrQuery = layer.options.sql;
const preQuery = layer.options.sql_raw || aggrQuery;
2019-10-22 01:07:24 +08:00
const ctx = {
dbConnection,
preQuery,
aggrQuery,
2019-10-22 01:07:24 +08:00
metaOptions: layer.options.metadata || {}
};
2018-05-08 01:03:19 +08:00
// TODO: could save some queries if queryUtils.getAggregationMetadata() has been used and kept somewhere
// we would set queries.results.estimatedFeatureCount and queries.results.geometryType
// (if metaOptions.geometryType) from it.
// TODO: compute _sample with _featureCount when available
// TODO: add support for sample.exclude option by, in that case, forcing the columns query and
// passing the results to the sample query function.
2018-10-05 01:50:14 +08:00
const dimensions = (layer.options.aggregation || {}).dimensions;
Promise.all([
_estimatedFeatureCount(ctx).then(
2019-08-23 23:09:53 +08:00
({ estimatedFeatureCount }) => _sample(ctx)
2019-10-22 01:07:24 +08:00
.then(sampleResults => mergeResults([sampleResults, { estimatedFeatureCount }]))
),
_featureCount(ctx),
_aggrFeatureCount(ctx),
_geometryType(ctx),
2018-10-05 01:50:14 +08:00
_columns(ctx).then(columns => _columnStats(ctx, columns, dimensions))
]).then(results => {
2018-10-05 01:50:14 +08:00
results = mergeResults(results);
callback(null, results);
}).catch(error => {
callback(error);
});
};
module.exports = MapnikLayerStats;