commit
20fe9c45cf
@ -146,13 +146,20 @@ function mergeColumns(results) {
|
||||
}
|
||||
}
|
||||
|
||||
const SAMPLE_SEED = 0.5;
|
||||
const DEFAULT_SAMPLE_ROWS = 100;
|
||||
|
||||
function _sample(ctx, numRows) {
|
||||
if (ctx.metaOptions.sample) {
|
||||
const sampleProb = Math.min(ctx.metaOptions.sample / numRows, 1);
|
||||
const sampleProb = Math.min(ctx.metaOptions.sample.num_rows / numRows, 1);
|
||||
// We'll use a safety limit just in case numRows is a bad estimate
|
||||
const limit = Math.ceil(ctx.metaOptions.sample * 1.5);
|
||||
return queryPromise(ctx.dbConnection, _getSQL(ctx, sql => queryUtils.getQuerySample(sql, sampleProb, limit)))
|
||||
.then(res => ({ sample: res.rows }));
|
||||
const requestedRows = ctx.metaOptions.sample.num_rows || DEFAULT_SAMPLE_ROWS;
|
||||
const limit = Math.ceil(requestedRows * 1.5);
|
||||
let columns = ctx.metaOptions.sample.include_columns;
|
||||
return queryPromise(ctx.dbConnection, _getSQL(
|
||||
ctx,
|
||||
sql => queryUtils.getQuerySample(sql, sampleProb, limit, SAMPLE_SEED, columns)
|
||||
)).then(res => ({ sample: res.rows }));
|
||||
}
|
||||
return Promise.resolve();
|
||||
}
|
||||
@ -265,6 +272,8 @@ function (layer, dbConnection, callback) {
|
||||
// (if metaOptions.geometryType) from it.
|
||||
|
||||
// TODO: compute _sample with _featureCount when available
|
||||
// TODO: add support for sample.exclude option by, in that case, forcing the columns query and
|
||||
// passing the results to the sample query function.
|
||||
|
||||
Promise.all([
|
||||
_estimatedFeatureCount(ctx).then(
|
||||
|
@ -88,17 +88,30 @@ module.exports.getQueryTopCategories = function(query, column, topN, includeNull
|
||||
`;
|
||||
};
|
||||
|
||||
module.exports.getQuerySample = function(query, sampleProb, limit = null, randomSeed = 0.5) {
|
||||
function columnSelector(columns) {
|
||||
if (!columns) {
|
||||
return '*';
|
||||
}
|
||||
if (typeof columns === 'string') {
|
||||
return columns;
|
||||
}
|
||||
if (Array.isArray(columns)) {
|
||||
return columns.map(name => `"${name}"`).join(', ');
|
||||
}
|
||||
throw new TypeError(`Bad argument type for columns: ${typeof columns}`);
|
||||
}
|
||||
|
||||
module.exports.getQuerySample = function(query, sampleProb, limit = null, randomSeed = 0.5, columns = null) {
|
||||
const singleTable = simpleQueryTable(query);
|
||||
if (singleTable) {
|
||||
return getTableSample(singleTable.table, singleTable.columns, sampleProb, limit, randomSeed);
|
||||
return getTableSample(singleTable.table, columns || singleTable.columns, sampleProb, limit, randomSeed);
|
||||
}
|
||||
const limitClause = limit ? `LIMIT ${limit}` : '';
|
||||
return `
|
||||
WITH __cdb_rndseed AS (
|
||||
SELECT setseed(${randomSeed})
|
||||
)
|
||||
SELECT *
|
||||
SELECT ${columnSelector(columns)}
|
||||
FROM (${query}) AS __cdb_query
|
||||
WHERE random() < ${sampleProb}
|
||||
${limitClause}
|
||||
@ -110,7 +123,9 @@ function getTableSample(table, columns, sampleProb, limit = null, randomSeed = 0
|
||||
sampleProb *= 100;
|
||||
randomSeed *= Math.pow(2, 31) -1;
|
||||
return `
|
||||
SELECT ${columns} FROM ${table} TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed}) ${limitClause}
|
||||
SELECT ${columnSelector(columns)}
|
||||
FROM ${table}
|
||||
TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed}) ${limitClause}
|
||||
`;
|
||||
}
|
||||
|
||||
|
@ -513,7 +513,7 @@ describe('Create mapnik layergroup', function() {
|
||||
version: '1.4.0',
|
||||
layers: [
|
||||
layerWithMetadata(mapnikLayer4, {
|
||||
sample: 3
|
||||
sample: { num_rows: 3 }
|
||||
})
|
||||
]
|
||||
});
|
||||
@ -529,6 +529,31 @@ describe('Create mapnik layergroup', function() {
|
||||
});
|
||||
});
|
||||
|
||||
it('can specify sample columns', function(done) {
|
||||
var testClient = new TestClient({
|
||||
version: '1.4.0',
|
||||
layers: [
|
||||
layerWithMetadata(mapnikLayer4, {
|
||||
sample: {
|
||||
num_rows: 3,
|
||||
include_columns: [ 'cartodb_id', 'address', 'the_geom' ]
|
||||
}
|
||||
})
|
||||
]
|
||||
});
|
||||
|
||||
testClient.getLayergroup(function(err, layergroup) {
|
||||
assert.ifError(err);
|
||||
assert.equal(layergroup.metadata.layers[0].id, mapnikBasicLayerId(0));
|
||||
assert.equal(layergroup.metadata.layers[0].meta.stats.estimatedFeatureCount, 5);
|
||||
assert(layergroup.metadata.layers[0].meta.stats.sample.length > 0);
|
||||
const expectedCols = [ 'cartodb_id', 'address', 'the_geom' ].sort();
|
||||
assert.deepEqual(Object.keys(layergroup.metadata.layers[0].meta.stats.sample[0]).sort(), expectedCols);
|
||||
testClient.drain(done);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
it('should only provide requested optional metadata', function(done) {
|
||||
var testClient = new TestClient({
|
||||
version: '1.4.0',
|
||||
|
Loading…
Reference in New Issue
Block a user