Merge pull request #968 from CartoDB/sample-columns

Sample columns
This commit is contained in:
Javier Goizueta 2018-05-28 17:51:38 +02:00 committed by GitHub
commit 20fe9c45cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 9 deletions

View File

@ -146,13 +146,20 @@ function mergeColumns(results) {
}
}
const SAMPLE_SEED = 0.5;
const DEFAULT_SAMPLE_ROWS = 100;
function _sample(ctx, numRows) {
if (ctx.metaOptions.sample) {
const sampleProb = Math.min(ctx.metaOptions.sample / numRows, 1);
const sampleProb = Math.min(ctx.metaOptions.sample.num_rows / numRows, 1);
// We'll use a safety limit just in case numRows is a bad estimate
const limit = Math.ceil(ctx.metaOptions.sample * 1.5);
return queryPromise(ctx.dbConnection, _getSQL(ctx, sql => queryUtils.getQuerySample(sql, sampleProb, limit)))
.then(res => ({ sample: res.rows }));
const requestedRows = ctx.metaOptions.sample.num_rows || DEFAULT_SAMPLE_ROWS;
const limit = Math.ceil(requestedRows * 1.5);
let columns = ctx.metaOptions.sample.include_columns;
return queryPromise(ctx.dbConnection, _getSQL(
ctx,
sql => queryUtils.getQuerySample(sql, sampleProb, limit, SAMPLE_SEED, columns)
)).then(res => ({ sample: res.rows }));
}
return Promise.resolve();
}
@ -265,6 +272,8 @@ function (layer, dbConnection, callback) {
// (if metaOptions.geometryType) from it.
// TODO: compute _sample with _featureCount when available
// TODO: add support for sample.exclude option by, in that case, forcing the columns query and
// passing the results to the sample query function.
Promise.all([
_estimatedFeatureCount(ctx).then(

View File

@ -88,17 +88,30 @@ module.exports.getQueryTopCategories = function(query, column, topN, includeNull
`;
};
module.exports.getQuerySample = function(query, sampleProb, limit = null, randomSeed = 0.5) {
function columnSelector(columns) {
if (!columns) {
return '*';
}
if (typeof columns === 'string') {
return columns;
}
if (Array.isArray(columns)) {
return columns.map(name => `"${name}"`).join(', ');
}
throw new TypeError(`Bad argument type for columns: ${typeof columns}`);
}
module.exports.getQuerySample = function(query, sampleProb, limit = null, randomSeed = 0.5, columns = null) {
const singleTable = simpleQueryTable(query);
if (singleTable) {
return getTableSample(singleTable.table, singleTable.columns, sampleProb, limit, randomSeed);
return getTableSample(singleTable.table, columns || singleTable.columns, sampleProb, limit, randomSeed);
}
const limitClause = limit ? `LIMIT ${limit}` : '';
return `
WITH __cdb_rndseed AS (
SELECT setseed(${randomSeed})
)
SELECT *
SELECT ${columnSelector(columns)}
FROM (${query}) AS __cdb_query
WHERE random() < ${sampleProb}
${limitClause}
@ -110,7 +123,9 @@ function getTableSample(table, columns, sampleProb, limit = null, randomSeed = 0
sampleProb *= 100;
randomSeed *= Math.pow(2, 31) -1;
return `
SELECT ${columns} FROM ${table} TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed}) ${limitClause}
SELECT ${columnSelector(columns)}
FROM ${table}
TABLESAMPLE BERNOULLI (${sampleProb}) REPEATABLE (${randomSeed}) ${limitClause}
`;
}

View File

@ -513,7 +513,7 @@ describe('Create mapnik layergroup', function() {
version: '1.4.0',
layers: [
layerWithMetadata(mapnikLayer4, {
sample: 3
sample: { num_rows: 3 }
})
]
});
@ -529,6 +529,31 @@ describe('Create mapnik layergroup', function() {
});
});
it('can specify sample columns', function(done) {
var testClient = new TestClient({
version: '1.4.0',
layers: [
layerWithMetadata(mapnikLayer4, {
sample: {
num_rows: 3,
include_columns: [ 'cartodb_id', 'address', 'the_geom' ]
}
})
]
});
testClient.getLayergroup(function(err, layergroup) {
assert.ifError(err);
assert.equal(layergroup.metadata.layers[0].id, mapnikBasicLayerId(0));
assert.equal(layergroup.metadata.layers[0].meta.stats.estimatedFeatureCount, 5);
assert(layergroup.metadata.layers[0].meta.stats.sample.length > 0);
const expectedCols = [ 'cartodb_id', 'address', 'the_geom' ].sort();
assert.deepEqual(Object.keys(layergroup.metadata.layers[0].meta.stats.sample[0]).sort(), expectedCols);
testClient.drain(done);
});
});
it('should only provide requested optional metadata', function(done) {
var testClient = new TestClient({
version: '1.4.0',