From fbcfc7a5821362e0e903a8b24239ac95a97a34ad Mon Sep 17 00:00:00 2001 From: Javier Goizueta Date: Thu, 20 Sep 2018 21:12:54 +0200 Subject: [PATCH] WIP: time dimensions for aggregation --- .../models/aggregation/aggregation-query.js | 96 +++++- .../models/aggregation/time-dimension.js | 278 ++++++++++++++++++ test/acceptance/aggregation.js | 40 +++ 3 files changed, 412 insertions(+), 2 deletions(-) create mode 100644 lib/cartodb/models/aggregation/time-dimension.js diff --git a/lib/cartodb/models/aggregation/aggregation-query.js b/lib/cartodb/models/aggregation/aggregation-query.js index a455e02a..84216e91 100644 --- a/lib/cartodb/models/aggregation/aggregation-query.js +++ b/lib/cartodb/models/aggregation/aggregation-query.js @@ -1,5 +1,8 @@ +const timeDimension = require('./time-dimension'); + const DEFAULT_PLACEMENT = 'point-sample'; + /** * Returns a template function (function that accepts template parameters and returns a string) * to generate an aggregation query. @@ -113,6 +116,95 @@ const aggregateColumnDefs = ctx => { const aggregateDimensions = ctx => ctx.dimensions || {}; +const timeDimensionParameters = definition => { + let group_by_count = definition.step || 1; + let group_by_units; + let group_by_cycle; + switch (definition.group_by) { + case 'second': + case 'minute': + case 'hour': + case 'day': + case 'week': + case 'month': + case 'quarter': + case 'year': + case 'century': + case 'millenium': + group_by_units = definition.group_by; + break; + case 'semester': + group_by_units = 'month'; + group_by_count *= 6; + break; + case 'trimester': + group_by_units = 'month'; + group_by_count *= 4; + break; + case 'minuteOfHour': + group_by_units = 'minute'; + group_by_cycle = 'hour'; + break; + case 'hourOfDay': + group_by_units = 'hour'; + group_by_cycle = 'day'; + break; + case 'dayOfWeek': + group_by_units = 'day'; + group_by_cycle = 'week'; + break; + case 'dayOfMonth': + group_by_units = 'day'; + group_by_cycle = 'month'; + break; + case 'dayOfYear': + group_by_units = 'day'; + group_by_cycle = 'year'; + break; + case 'weekOfYear': + group_by_units = 'week'; + group_by_cycle = 'year'; + break; + case 'monthOfYear': + group_by_units = 'month'; + group_by_cycle = 'year'; + break; + case 'quarterOfYear': + group_by_units = 'quarter'; + group_by_cycle = 'year'; + break; + case 'trimesterOfYear': + group_by_units = 'month'; + group_by_count *= 4; + group_by_cycle = 'year'; + break; + case 'semesterOfYear': + group_by_units = 'month'; + group_by_count *= 6; + group_by_cycle = 'year'; + break; + default: + throw new Error(`Invalid time grouping ${definition.group_by}`); + } + // definition.column should correspond to a wrapped date column + return { + time: `to_timestamp("${definition.column}")`, + timezone: definition.timezone || 'utc', + granularity: group_by_units, + multiplicity: group_by_count || 1, + cycle: group_by_cycle, + offset: definition.offset || 0 + }; +}; + +const dimensionExpression = definition => { + if (typeof(definition) === 'string') { + return `"definition"`; + } + // Currently only time dimensions are supported with parameters + return timeDimension(timeDimensionParameters(definition)); +}; + const dimensionNames = (ctx, table) => { let dimensions = aggregateDimensions(ctx); if (table) { @@ -128,8 +220,8 @@ const dimensionNames = (ctx, table) => { const dimensionDefs = ctx => { let dimensions = aggregateDimensions(ctx); return sep(Object.keys(dimensions).map(dimension_name => { - const expression = dimensions[dimension_name]; - return `"${expression}" AS "${dimension_name}"`; + const expression = dimensionExpression(dimensions[dimension_name]); + return `${expression} AS "${dimension_name}"`; })); }; diff --git a/lib/cartodb/models/aggregation/time-dimension.js b/lib/cartodb/models/aggregation/time-dimension.js new file mode 100644 index 00000000..16eb468c --- /dev/null +++ b/lib/cartodb/models/aggregation/time-dimension.js @@ -0,0 +1,278 @@ +const MONTH_SECONDS = 365.2425 / 12 * 24 * 3600 // PG intervals use 30 * 24 * 3600 +const YEAR_SECONDS = 12 * MONTH_SECONDS; + +// time unit durations +const usecs = { + second: 1, + minute: 60, + hour: 3600, + day: 24 * 3600, + week: 7 * 24 * 3600, + month: MONTH_SECONDS, + year: YEAR_SECONDS, + + quarter: 3 * MONTH_SECONDS, + semester: 6 * MONTH_SECONDS, + trimester: 4 * MONTH_SECONDS, + decade: 12 * YEAR_SECONDS, + century: 100 * YEAR_SECONDS, + millennium: 1000 * YEAR_SECONDS +}; + +serialParts = { + second: { + sql: `FLOOR(date_part('epoch', $t))`, + zeroBased: true + }, + minute: { + sql: `date_part('epoch', date_trunc('day', $t))/60`, + zeroBased: true + }, + hour: { + sql: `date_part('epoch', date_trunc('day', $t))/(60*60)`, + zeroBased: true + }, + day: { + sql: `date_part('epoch', date_trunc('day', $t))/(24*60*60) + 1`, + zeroBased: false + }, + week: { + sql: `date_part('epoch', date_trunc('week', $t))/(7*24*60*60) + 1`, + zeroBaseed: false + }, + month: { + sql: `date_part('month', $t) + 12*(date_part('year', $t)-date_part('year', to_timestamp(0.0)))`, + zeroBased: false + }, + quarter: { + sql: `date_part('quarter', $t) + 4*(date_part('year', $t)-date_part('year', to_timestamp(0.0)))`, + zeroBased: false + }, + year: { + sql: `date_part('year', $t)-date_part('year', to_timestamp(0.0))`, + zeroBased: false + } +}; + +function serialSqlExpr(t, tz, u, m = 1, u_offset = 0, m_offset = 0) { + [u, m, u_offset] = serialNormalize(u, m, u_offset); + let { sql, zeroBased } = serialParts[u]; + const column = timeExpression(t, tz); + const serial = sql.replace(/\$t/g, column); + let expr = serial; + if (u_offset !== 0) { + expr = `expr - ${u_offset}`; + } + if (m !== 1) { + if (zeroBased) { + expr = `FLOOR((${expr})/(${m}::double precision))::int`; + } else { + expr = `CEIL((${expr})/(${m}::double precision))::int`; + } + } else { + expr = `ROUND(${expr})::int`; + } + if (m_offset !== 0) { + expr = `(${expr} - 1)`; + } + return expr; +} + +function serialNormalize(u, m, u_offset) { + if (u === 'semester') { + u = 'month'; + m *= 6; + u_offset *= 6; + } else if (u === 'trimester') { + u = 'month'; + m *= 4; + u_offset *= 4; + } else if (u === 'decade') { + u = 'year'; + m *= 10; + u_offset *= 10 + } else if (u === 'century') { + u = 'year'; + m *= 100; + u_offset *= 100 + } else if (u === 'millenium') { + u = 'year'; + m *= 1000; + u_offset *= 1000 + } + return [u, m, u_offset]; +} + +function cyclicNormalize(u, m, c, c_offset) { + if (u === 'month' && m === 3) { + u = 'quarter'; + m = 1; + } else if (u === 'month' && m === 6) { + u = 'semester'; + m = 1; + } else if (u === 'month' && m === 4) { + u = 'trimester'; + m = 1; + } + if (m !== 1) { + throw new Error(`invalid multiplicity ${m} for cyclic ${u}`); + } + return [u, m, c, c_offset]; +} + +// timezones can be defined either by an numeric offset in seconds or by +// a valid (case-insensitive) tz/PG name; +// they include abbreviations defined by PG (which have precedence and +// are fixed offsets, not handling DST) or general names that can handle DST. +function timezone(tz) { + if (isFinite(tz)) { + return `INTERVAL '${tz} seconds'`; + } + return `'${tz}'` +} + +// We assume t is a TIMESTAMP WITH TIME ZONE. +// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone) +// it should be converted with `timezone('utc',t)` to a type with time zone. +// Note that by default CARTO uses timestamp with time zone columns for dates +// and VectorMapConfigAdapter converts them to epoch numbers. +// So, for using this with aggregations, relying on dates & times +// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the +// (converted) column. +function timeExpression(t, tz) { + if (tz !== undefined) { + return `timezone(${timezone(tz)}, ${t})` + } + return t; +} + +function cyclicSqlExpr(t, tz, u, c, c_offset = 0, m = 1) { + [u, m, c, c_offset] = cyclicNormalize(u, m, c, c_offset); + const comb = `${u}/${c}`; + const column = timeExpression(t, tz); + let expr; + + if (m === 1) { + switch (comb) { + case 'day/week': + // result: 0-6 + // c_offset = 0 => 0 = sunday; 1 => 0 = monday... + // let expr = `EXTRACT(DOW FROM ${column})`; + expr = `date_part('dow', ${column})`; + if (c_offset !== 0) { + expr = `(${expr} - ${c_offset}) % 7`; + } + return expr; + + // iso dow monday=1, no offset: + // `EXTRACT(ISODOW FROM ${column})` + // iso dow 1-6, offset 0 => 1 = mondayº + // expr = `date_part('dow, ${column})`; + // c_offset += 1; + // expr = `(${expr} - ${c_offset}) % 7 + 1`; + + case 'day/month': + // result: 1-31 + // c_offset not supported + return `date_part('day', ${column})`; + + case 'day/year': + // result: 1-366 + // c_offset not supported + return `date_part('doy', ${column})`; + + case 'hour/day': + // result: 0-23 + expr = `date_part('hour', ${column})`; + if (c_offset !== 0) { + expr = `(${expr} - ${c_offset}) % 24`; + } + return expr; + + case 'month/year': + // result 1-12 + expr = `date_part('month', ${column})`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset} - 1) % 12) + 1`; + } + return expr; + + case 'quarter/year': + // result 1-4 + expr = `date_part('quarter', ${column})`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset} - 1) % 4) + 1`; + } + return expr; + + case 'semester/year': + // result 1-2 + expr = `FLOOR((date_part('month', ${column})-1)/6.0) + 1`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset} - 1) % 2) + 1`; + } + return expr; + + case 'trimester/year': + // result 1-3 + expr = `FLOOR((date_part('month', ${column})-1)/4.0) + 1`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset} - 1) % 3) + 1`; + } + return expr; + + case 'week/year': + // result 1-52 + expr = `date_part('week', ${column})`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset} - 1) % 52) + 1`; + } + return expr; + + case 'minute/hour': + // result 0-59 + expr = `date_part('minute', ${column})`; + if (c_offset !== 0) { + expr = `((${expr} - ${c_offset}) % 60)`; + } + return expr; + } + } + return genericCyclicSqlExpr(t, u, c, c_offset, m); +} + +function genericCyclicSqlExpr(t, tz, u, c, c_offset = 0, m = 1) { + const usec = usecs[u]; + const csec = usecs[c]; + const column = timeExpression(t, tz); + return `((FLOOR(date_part('epoch', ${column})/(${usec*m}))*(${usec*m})+${c_offset}) % ${csec})/${usec*m}`; +} + +function validateParameters(params) { + return true; +} + +function classificationSql(params) { + validateParameters(params); + if (params.cycle) { + return cyclicSqlExpr( + params.time, + params.timezone || 'utc', + params.granularity, + params.cycle, + params.offset || 0, + params.multiplicity || 1 + ); + } else { + return serialSqlExpr( + params.time, + params.timezone || 'utc', + params.granularity, + params.multiplicity || 1, + params.offset || 0, + 0 + ); + + } +} +module.exports = classificationSql; diff --git a/test/acceptance/aggregation.js b/test/acceptance/aggregation.js index 39fb7845..427983e4 100644 --- a/test/acceptance/aggregation.js +++ b/test/acceptance/aggregation.js @@ -878,6 +878,46 @@ describe('aggregation', function () { }); }); + it('time dimensions', + function (done) { + this.mapConfig = createVectorMapConfig([ + { + type: 'cartodb', + options: { + sql: POINTS_SQL_TIMESTAMP_1, + dates_as_numbers: true, + aggregation: { + threshold: 1, + dimensions: { + dow: { + column: 'date', + group_by: 'dayOfWeek' + } + } + } + } + } + ]); + + this.testClient = new TestClient(this.mapConfig); + const options = { + format: 'mvt' + }; + this.testClient.getTile(0, 0, 0, options, (err, res, tile) => { + if (err) { + return done(err); + } + + const tileJSON = tile.toJSON(); + + tileJSON[0].features.forEach(feature => assert.equal(typeof feature.properties.dow, 'number')); + + done(); + }); + }); + + + ['centroid', 'point-sample', 'point-grid'].forEach(placement => { it(`dimensions should work for ${placement} placement`, function(done) { this.mapConfig = createVectorMapConfig([