WIP:add layer metadata
This commit is contained in:
parent
20b46a33cf
commit
7561635b24
@ -1,4 +1,5 @@
|
||||
var queryUtils = require('../../utils/query-utils');
|
||||
const AggregationMapConfig = require('../../models/aggregation/aggregation-mapconfig');
|
||||
|
||||
function MapnikLayerStats () {
|
||||
this._types = {
|
||||
@ -11,18 +12,169 @@ MapnikLayerStats.prototype.is = function (type) {
|
||||
return this._types[type] ? this._types[type] : false;
|
||||
};
|
||||
|
||||
function queryPromise(dbConnection, query, callback) {
|
||||
return new Promise(function(resolve, reject) {
|
||||
dbConnection.query(query, function (err, res) {
|
||||
err = callback(err, res);
|
||||
if (err) {
|
||||
reject(err);
|
||||
}
|
||||
else {
|
||||
resolve();
|
||||
}
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
MapnikLayerStats.prototype.getStats =
|
||||
function (layer, dbConnection, callback) {
|
||||
var queryRowCountSql = queryUtils.getQueryRowCount(layer.options.sql);
|
||||
// This query would gather stats for postgresql table if not exists
|
||||
dbConnection.query(queryRowCountSql, function (err, res) {
|
||||
if (err) {
|
||||
return callback(null, {estimatedFeatureCount: -1});
|
||||
} else {
|
||||
// We decided that the relation is 1 row == 1 feature
|
||||
return callback(null, {estimatedFeatureCount: res.rows[0].rows});
|
||||
let query = layer.options.sql;
|
||||
let rawQuery = layer.options.sql_raw ? layer.options.sql_raw : layer.options.sql;
|
||||
let metaOptions = layer.options.metadata || {};
|
||||
|
||||
let stats = {};
|
||||
|
||||
// TODO: could save some queries if queryUtils.getAggregationMetadata() has been used and kept somewhere
|
||||
// we would set stats.estimatedFeatureCount and stats.geometryType (if metaOptions.geometryType) from it.
|
||||
|
||||
// We'll add promises for queries to be executed to the next two lists;
|
||||
// the queries in statQueries2 will be executed after all of statQueries are completed,
|
||||
// so any results from them can be used.
|
||||
// Query promises will store results in the shared stats object.
|
||||
let statQueries = [], statQueries2 = [];
|
||||
|
||||
if (stats.estimatedFeatureCount === undefined) {
|
||||
statQueries.push(
|
||||
queryPromise(dbConnection, queryUtils.getQueryRowEstimation(query), function(err, res) {
|
||||
if (err) {
|
||||
// at least for debugging we should err
|
||||
stats.estimatedFeatureCount = -1;
|
||||
return null;
|
||||
} else {
|
||||
// We decided that the relation is 1 row == 1 feature
|
||||
stats.estimatedFeatureCount = res.rows[0].rows;
|
||||
return null;
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
if (metaOptions.featureCount) {
|
||||
// TODO: if metaOptions.columnStats we can combine this with column stats query
|
||||
statQueries.push(
|
||||
queryPromise(
|
||||
queryUtils.getQueryActualRowCount(rawQuery),
|
||||
function(err, res) {
|
||||
if (err) {
|
||||
stats.featureCount = -1;
|
||||
} else {
|
||||
stats.featureCount = res.rows[0].rows;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (metaOptions.sample) {
|
||||
const numRows = stats.featureCount === undefined ? stats.estimatedFeatureCount : stats.featureCount;
|
||||
const sampleProb = Math.min(metaOptions.sample / numRows, 1);
|
||||
statQueries2.push(
|
||||
queryPromise(
|
||||
queryUtils.getQuerySample(rawQuery, sampleProb),
|
||||
function(err, res) {
|
||||
if (err) {
|
||||
stats.sample = [];
|
||||
} else {
|
||||
stats.sample = res.rows;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (metaOptions.geometryType && stats.geometryType === undefined) {
|
||||
const geometryColumn = AggregationMapConfig.getAggregationGeometryColumn();
|
||||
statQueries.push(
|
||||
queryPromise(queryUtils.getQueryGeometryType(rawQuery, geometryColumn), function(err, res) {
|
||||
if (!err) {
|
||||
stats.geometryType = res.geom_type;
|
||||
}
|
||||
return err;
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
function columnAggregations(field) {
|
||||
if (field.type === 'number') {
|
||||
return ['min', 'max', 'avg', 'sum'];
|
||||
}
|
||||
});
|
||||
if (field.type === 'date') { // TODO other types too?
|
||||
return ['min', 'max'];
|
||||
}
|
||||
}
|
||||
|
||||
if (metaOptions.columns || metaOptions.columnStats) {
|
||||
statQueries.push(
|
||||
// TODO: note we have getLayerColumns in aggregation mapconfig.
|
||||
// and also getLayerAggregationColumns which either uses getLayerColumns or derives columns from parameters
|
||||
queryPromise(queryUtils.getQueryLimited(rawQuery, 0), function(err, res) {
|
||||
if (!err) {
|
||||
stats.columns = res.fields;
|
||||
if (metaOptions.columnStats) {
|
||||
let aggr = [];
|
||||
Object.keys(stats.columns).forEach(name => {
|
||||
aggr = aggr.concat(columnAggregations(stats.columns[name])
|
||||
.map(fn => `${fn}(${name}) AS ${name}_${fn}`));
|
||||
if (stats.columns[name].type === 'string') {
|
||||
statQueries2.push(
|
||||
queryPromise(topQuery(rawQuery, name, N), function(err, res){
|
||||
if (!err) {
|
||||
const topN = metaOptions.columnStats.topCategories || 1024;
|
||||
// TODO: metaOptions.columnStats.maxCategories => use PG stats to dismiss columns with more distinct values
|
||||
statQueries2.push(
|
||||
queryPromise(
|
||||
queryUtils.getQueryTopCategories(rawQuery, topN),
|
||||
function(err, res) {
|
||||
if (!err) {
|
||||
stats.columns[name].categories = res.rows;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
)
|
||||
);
|
||||
|
||||
}
|
||||
return err;
|
||||
})
|
||||
);
|
||||
}
|
||||
})
|
||||
statQueries2.push(
|
||||
queryPromise(`SELECT ${aggr.join(',')} FROM (${rawQuery})`, function(err, res){
|
||||
if (!err) {
|
||||
Object.keys(stats.columns).forEach(name => {
|
||||
columnAggregations(stats.columns[name]).forEach(fn => {
|
||||
stats.columns[name][fn] = res.rows[0][`${name}_${fn}`]
|
||||
});
|
||||
});
|
||||
}
|
||||
return err;
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
})
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
Promise.all(statQueries).then( () => {
|
||||
Promise.all(statQueries2).then( () => callback(null, stats) ).catch( err => callback(err) );
|
||||
}).catch( err => callback(err) );
|
||||
};
|
||||
|
||||
module.exports = MapnikLayerStats;
|
||||
|
@ -71,3 +71,84 @@ module.exports.countNaNs = function countNaNs(ctx) {
|
||||
`sum(CASE WHEN (${ctx.column} = 'NaN'::float) THEN 1 ELSE 0 END)`
|
||||
}`;
|
||||
};
|
||||
|
||||
module.exports.getQueryTopCategories = function(query, column, topN, includeNulls=false) {
|
||||
const where = includeNulls ? '' : `WHERE ${column} IS NOT NULL`;
|
||||
return `
|
||||
SELECT ${column} AS category, COUNT(*) AS frequency
|
||||
FROM (${query}) AS __cdb_query
|
||||
${where}
|
||||
GROUP BY ${column} ORDER BY 2 DESC
|
||||
LIMIT ${topN}
|
||||
`;
|
||||
}
|
||||
|
||||
module.exports.getQueryActualRowCount = function (query) {
|
||||
return 'select COUNT(*) AS rows FROM (${query}) AS __cdb_query';
|
||||
};
|
||||
|
||||
|
||||
module.exports.getQuerySample = function(query, sampleProb, randomSeed = 0.5) {
|
||||
const table = simpleQueryTable(query);
|
||||
if (table) {
|
||||
return getTableSample(table, sampleProb, randomSeed);
|
||||
}
|
||||
return `
|
||||
WITH __cdb_rndseed AS (
|
||||
SELECT setseed(${randomSeed})
|
||||
)
|
||||
SELECT *
|
||||
FROM (${query}) AS __cdb_query
|
||||
WHERE random() < $
|
||||
`;
|
||||
q = `WITH _rndseed as (SELECT setseed(0.5))
|
||||
SELECT * FROM (${this._source._query}) as _cdb_query_wrapper WHERE random() < ${sampleProb};`;
|
||||
};
|
||||
|
||||
module.exports.getTableSample = function(table, sampleProb, randomSeed) {
|
||||
sampleProb *= 100;
|
||||
randomSeed *= Math.pow(2, 31) -1;
|
||||
return `
|
||||
SELECT * FROM ${table} TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed})
|
||||
`;
|
||||
}
|
||||
|
||||
function simpleQueryTable(sql) {
|
||||
const basicQuery =
|
||||
/\s*SELECT\s+[\*a-z0-9_,\s]+?\s+FROM\s+((\"[^"]+\"|[a-z0-9_]+)\.)?(\"[^"]+\"|[a-z0-9_]+)\s*;?\s*/i;
|
||||
const unwrappedQuery = new RegExp("^"+basicQuery.source+"$", 'i');
|
||||
// queries for named maps are wrapped like this:
|
||||
var wrappedQuery = new RegExp(
|
||||
"^\\s*SELECT\\s+\\*\\s+FROM\\s+\\(" +
|
||||
basicQuery.source +
|
||||
"\\)\\s+AS\\s+wrapped_query\\s+WHERE\\s+\\d+=1\\s*$",
|
||||
'i'
|
||||
);
|
||||
let match = sql.match(unwrappedQuery);
|
||||
if (!match) {
|
||||
match = sql.match(wrappedQuery);
|
||||
}
|
||||
if (match) {
|
||||
schema = match[2];
|
||||
table = match[3];
|
||||
return schema ? `${schema}.${table}` : table;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
module.exports.getQueryGeometryType = function(query, geometryColumn) {
|
||||
return `
|
||||
SELECT ST_GeometryType(${geometryColumn}) AS geom_type
|
||||
FROM (${query}) AS __cdb_query
|
||||
WHERE ${geometryColumn} IS NOT NULL
|
||||
LIMIT 1
|
||||
`;
|
||||
};
|
||||
|
||||
module.exports.getQueryLimited = function(query, limit=0) {
|
||||
return `
|
||||
SELECT *
|
||||
FROM (${query}) AS __cdb_query
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user