WIP:add layer metadata

This commit is contained in:
Javier Goizueta 2018-05-07 19:03:19 +02:00
parent 20b46a33cf
commit 7561635b24
2 changed files with 242 additions and 9 deletions

View File

@ -1,4 +1,5 @@
var queryUtils = require('../../utils/query-utils');
const AggregationMapConfig = require('../../models/aggregation/aggregation-mapconfig');
function MapnikLayerStats () {
this._types = {
@ -11,18 +12,169 @@ MapnikLayerStats.prototype.is = function (type) {
return this._types[type] ? this._types[type] : false;
};
function queryPromise(dbConnection, query, callback) {
return new Promise(function(resolve, reject) {
dbConnection.query(query, function (err, res) {
err = callback(err, res);
if (err) {
reject(err);
}
else {
resolve();
}
});
});
}
MapnikLayerStats.prototype.getStats =
function (layer, dbConnection, callback) {
var queryRowCountSql = queryUtils.getQueryRowCount(layer.options.sql);
// This query would gather stats for postgresql table if not exists
dbConnection.query(queryRowCountSql, function (err, res) {
if (err) {
return callback(null, {estimatedFeatureCount: -1});
} else {
// We decided that the relation is 1 row == 1 feature
return callback(null, {estimatedFeatureCount: res.rows[0].rows});
let query = layer.options.sql;
let rawQuery = layer.options.sql_raw ? layer.options.sql_raw : layer.options.sql;
let metaOptions = layer.options.metadata || {};
let stats = {};
// TODO: could save some queries if queryUtils.getAggregationMetadata() has been used and kept somewhere
// we would set stats.estimatedFeatureCount and stats.geometryType (if metaOptions.geometryType) from it.
// We'll add promises for queries to be executed to the next two lists;
// the queries in statQueries2 will be executed after all of statQueries are completed,
// so any results from them can be used.
// Query promises will store results in the shared stats object.
let statQueries = [], statQueries2 = [];
if (stats.estimatedFeatureCount === undefined) {
statQueries.push(
queryPromise(dbConnection, queryUtils.getQueryRowEstimation(query), function(err, res) {
if (err) {
// at least for debugging we should err
stats.estimatedFeatureCount = -1;
return null;
} else {
// We decided that the relation is 1 row == 1 feature
stats.estimatedFeatureCount = res.rows[0].rows;
return null;
}
})
);
}
if (metaOptions.featureCount) {
// TODO: if metaOptions.columnStats we can combine this with column stats query
statQueries.push(
queryPromise(
queryUtils.getQueryActualRowCount(rawQuery),
function(err, res) {
if (err) {
stats.featureCount = -1;
} else {
stats.featureCount = res.rows[0].rows;
}
return err;
}
)
);
}
if (metaOptions.sample) {
const numRows = stats.featureCount === undefined ? stats.estimatedFeatureCount : stats.featureCount;
const sampleProb = Math.min(metaOptions.sample / numRows, 1);
statQueries2.push(
queryPromise(
queryUtils.getQuerySample(rawQuery, sampleProb),
function(err, res) {
if (err) {
stats.sample = [];
} else {
stats.sample = res.rows;
}
return err;
}
)
);
}
if (metaOptions.geometryType && stats.geometryType === undefined) {
const geometryColumn = AggregationMapConfig.getAggregationGeometryColumn();
statQueries.push(
queryPromise(queryUtils.getQueryGeometryType(rawQuery, geometryColumn), function(err, res) {
if (!err) {
stats.geometryType = res.geom_type;
}
return err;
})
);
}
function columnAggregations(field) {
if (field.type === 'number') {
return ['min', 'max', 'avg', 'sum'];
}
});
if (field.type === 'date') { // TODO other types too?
return ['min', 'max'];
}
}
if (metaOptions.columns || metaOptions.columnStats) {
statQueries.push(
// TODO: note we have getLayerColumns in aggregation mapconfig.
// and also getLayerAggregationColumns which either uses getLayerColumns or derives columns from parameters
queryPromise(queryUtils.getQueryLimited(rawQuery, 0), function(err, res) {
if (!err) {
stats.columns = res.fields;
if (metaOptions.columnStats) {
let aggr = [];
Object.keys(stats.columns).forEach(name => {
aggr = aggr.concat(columnAggregations(stats.columns[name])
.map(fn => `${fn}(${name}) AS ${name}_${fn}`));
if (stats.columns[name].type === 'string') {
statQueries2.push(
queryPromise(topQuery(rawQuery, name, N), function(err, res){
if (!err) {
const topN = metaOptions.columnStats.topCategories || 1024;
// TODO: metaOptions.columnStats.maxCategories => use PG stats to dismiss columns with more distinct values
statQueries2.push(
queryPromise(
queryUtils.getQueryTopCategories(rawQuery, topN),
function(err, res) {
if (!err) {
stats.columns[name].categories = res.rows;
}
return err;
}
)
);
}
return err;
})
);
}
})
statQueries2.push(
queryPromise(`SELECT ${aggr.join(',')} FROM (${rawQuery})`, function(err, res){
if (!err) {
Object.keys(stats.columns).forEach(name => {
columnAggregations(stats.columns[name]).forEach(fn => {
stats.columns[name][fn] = res.rows[0][`${name}_${fn}`]
});
});
}
return err;
})
);
}
}
return err;
})
);
}
Promise.all(statQueries).then( () => {
Promise.all(statQueries2).then( () => callback(null, stats) ).catch( err => callback(err) );
}).catch( err => callback(err) );
};
module.exports = MapnikLayerStats;

View File

@ -71,3 +71,84 @@ module.exports.countNaNs = function countNaNs(ctx) {
`sum(CASE WHEN (${ctx.column} = 'NaN'::float) THEN 1 ELSE 0 END)`
}`;
};
module.exports.getQueryTopCategories = function(query, column, topN, includeNulls=false) {
const where = includeNulls ? '' : `WHERE ${column} IS NOT NULL`;
return `
SELECT ${column} AS category, COUNT(*) AS frequency
FROM (${query}) AS __cdb_query
${where}
GROUP BY ${column} ORDER BY 2 DESC
LIMIT ${topN}
`;
}
module.exports.getQueryActualRowCount = function (query) {
return 'select COUNT(*) AS rows FROM (${query}) AS __cdb_query';
};
module.exports.getQuerySample = function(query, sampleProb, randomSeed = 0.5) {
const table = simpleQueryTable(query);
if (table) {
return getTableSample(table, sampleProb, randomSeed);
}
return `
WITH __cdb_rndseed AS (
SELECT setseed(${randomSeed})
)
SELECT *
FROM (${query}) AS __cdb_query
WHERE random() < $
`;
q = `WITH _rndseed as (SELECT setseed(0.5))
SELECT * FROM (${this._source._query}) as _cdb_query_wrapper WHERE random() < ${sampleProb};`;
};
module.exports.getTableSample = function(table, sampleProb, randomSeed) {
sampleProb *= 100;
randomSeed *= Math.pow(2, 31) -1;
return `
SELECT * FROM ${table} TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed})
`;
}
function simpleQueryTable(sql) {
const basicQuery =
/\s*SELECT\s+[\*a-z0-9_,\s]+?\s+FROM\s+((\"[^"]+\"|[a-z0-9_]+)\.)?(\"[^"]+\"|[a-z0-9_]+)\s*;?\s*/i;
const unwrappedQuery = new RegExp("^"+basicQuery.source+"$", 'i');
// queries for named maps are wrapped like this:
var wrappedQuery = new RegExp(
"^\\s*SELECT\\s+\\*\\s+FROM\\s+\\(" +
basicQuery.source +
"\\)\\s+AS\\s+wrapped_query\\s+WHERE\\s+\\d+=1\\s*$",
'i'
);
let match = sql.match(unwrappedQuery);
if (!match) {
match = sql.match(wrappedQuery);
}
if (match) {
schema = match[2];
table = match[3];
return schema ? `${schema}.${table}` : table;
}
return false;
}
module.exports.getQueryGeometryType = function(query, geometryColumn) {
return `
SELECT ST_GeometryType(${geometryColumn}) AS geom_type
FROM (${query}) AS __cdb_query
WHERE ${geometryColumn} IS NOT NULL
LIMIT 1
`;
};
module.exports.getQueryLimited = function(query, limit=0) {
return `
SELECT *
FROM (${query}) AS __cdb_query
LIMIT ${limit}
`;
};