Windshaft-cartodb/lib/cartodb/backends/layer-stats/mapnik-layer-stats.js

184 lines
6.6 KiB
JavaScript
Raw Normal View History

2017-05-18 17:51:12 +08:00
var queryUtils = require('../../utils/query-utils');
const PhasedExecution = require('../../utils/phased-execution');
2018-05-08 01:03:19 +08:00
const AggregationMapConfig = require('../../models/aggregation/aggregation-mapconfig');
function MapnikLayerStats () {
this._types = {
mapnik: true,
cartodb: true
};
}
MapnikLayerStats.prototype.is = function (type) {
return this._types[type] ? this._types[type] : false;
};
2018-05-08 01:03:19 +08:00
function queryPromise(dbConnection, query, callback) {
return new Promise(function(resolve, reject) {
dbConnection.query(query, function (err, res) {
err = callback(err, res);
if (err) {
reject(err);
}
else {
resolve();
}
});
});
}
function columnAggregations(field) {
if (field.type === 'number') {
return ['min', 'max', 'avg', 'sum'];
}
if (field.type === 'date') { // TODO other types too?
return ['min', 'max'];
}
}
2018-05-08 01:03:19 +08:00
function firstPhaseQueries(queries, ctx) {
if (queries.results.estimatedFeatureCount === undefined) {
queries.task(
queryPromise(ctx.dbConnection, queryUtils.getQueryRowEstimation(ctx.query), function(err, res) {
2018-05-08 01:03:19 +08:00
if (err) {
// at least for debugging we should err
queries.results.estimatedFeatureCount = -1;
2018-05-08 01:03:19 +08:00
return null;
} else {
// We decided that the relation is 1 row == 1 feature
queries.results.estimatedFeatureCount = res.rows[0].rows;
2018-05-08 01:03:19 +08:00
return null;
}
})
);
}
if (ctx.metaOptions.featureCount) {
// TODO: if ctx.metaOptions.columnStats we can combine this with column stats query
queries.task(
2018-05-08 01:03:19 +08:00
queryPromise(
queryUtils.getQueryActualRowCount(ctx.rawQuery),
2018-05-08 01:03:19 +08:00
function(err, res) {
if (err) {
queries.results.featureCount = -1;
2018-05-08 01:03:19 +08:00
} else {
queries.results.featureCount = res.rows[0].rows;
2018-05-08 01:03:19 +08:00
}
return err;
}
)
);
}
if (ctx.metaOptions.geometryType && queries.results.geometryType === undefined) {
const geometryColumn = AggregationMapConfig.getAggregationGeometryColumn();
queries.task(
queryPromise(queryUtils.getQueryGeometryType(ctx.rawQuery, geometryColumn), function(err, res) {
if (!err) {
queries.results.geometryType = res.geom_type;
}
return err;
})
);
}
if (ctx.metaOptions.columns || ctx.metaOptions.columnStats) {
queries.task(
// TODO: note we have getLayerColumns in aggregation mapconfig.
// and also getLayerAggregationColumns which either uses getLayerColumns or derives columns from parameters
queryPromise(queryUtils.getQueryLimited(ctx.rawQuery, 0), function(err, res) {
if (!err) {
queries.results.columns = res.fields;
}
return err;
})
);
}
}
function secondPhaseQueries(queries, ctx) {
if (ctx.metaOptions.sample) {
const numRows = queries.results.featureCount === undefined ?
queries.results.estimatedFeatureCount :
queries.results.featureCount;
const sampleProb = Math.min(ctx.metaOptions.sample / numRows, 1);
queries.task(
2018-05-08 01:03:19 +08:00
queryPromise(
queryUtils.getQuerySample(ctx.rawQuery, sampleProb),
2018-05-08 01:03:19 +08:00
function(err, res) {
if (err) {
queries.results.sample = [];
2018-05-08 01:03:19 +08:00
} else {
queries.results.sample = res.rows;
2018-05-08 01:03:19 +08:00
}
return err;
}
)
);
}
if (ctx.metaOptions.columnStats) {
let aggr = [];
Object.keys(queries.results.columns).forEach(name => {
aggr = aggr.concat(columnAggregations(queries.results.columns[name])
.map(fn => `${fn}(${name}) AS ${name}_${fn}`));
if (queries.results.columns[name].type === 'string') {
const topN = ctx.metaOptions.columnStats.topCategories || 1024;
// TODO: ctx.metaOptions.columnStats.maxCategories
// => use PG stats to dismiss columns with more distinct values
queries.task(
queryPromise(queryUtils.getQueryTopCategories(ctx.rawQuery, name, topN), function(err, res){
if (!err) {
queries.results.columns[name].categories = res.rows;
}
return err;
})
);
}
});
queries.task(
queryPromise(`SELECT ${aggr.join(',')} FROM (${ctx.rawQuery})`, function(err, res){
2018-05-08 01:03:19 +08:00
if (!err) {
Object.keys(queries.results.columns).forEach(name => {
columnAggregations(queries.results.columns[name]).forEach(fn => {
queries.results.columns[name][fn] = res.rows[0][`${name}_${fn}`];
});
});
2018-05-08 01:03:19 +08:00
}
return err;
})
);
}
}
2018-05-08 01:03:19 +08:00
MapnikLayerStats.prototype.getStats =
function (layer, dbConnection, callback) {
let context = {
dbConnection,
query: layer.options.sql,
rawQuery: layer.options.sql_raw ? layer.options.sql_raw : layer.options.sql,
metaOptions: layer.options.metadata || {}
};
2018-05-08 01:03:19 +08:00
let queries = new PhasedExecution();
// TODO: could save some queries if queryUtils.getAggregationMetadata() has been used and kept somewhere
// we would set queries.results.estimatedFeatureCount and queries.results.geometryType
// (if metaOptions.geometryType) from it.
// We'll add promises for queries to be executed to the next two lists;
// the queries in statQueries2 will be executed after all of statQueries are completed,
// so any results from them can be used.
// Query promises will store results in the shared stats object.
2018-05-08 01:03:19 +08:00
// Queries will be executed in two phases, with results from the first phase needed
// to define the queries of the second phase
queries.phase(() => firstPhaseQueries(queries, context));
queries.phase(() => secondPhaseQueries(queries, context));
queries.run(results => callback(null, results)).catch(error => callback(error));
};
module.exports = MapnikLayerStats;