Windshaft-cartodb/lib/models/aggregation/time-dimension.js

268 lines
9.3 KiB
JavaScript
Raw Normal View History

'use strict';
2018-10-04 03:02:22 +08:00
// timezones can be defined either by an numeric offset in seconds or by
// a valid (case-insensitive) tz/PG name;
// they include abbreviations defined by PG (which have precedence and
// are fixed offsets, not handling DST) or general names that can handle DST.
2019-10-22 01:07:24 +08:00
function timezone (tz) {
2018-10-04 03:02:22 +08:00
if (isFinite(tz)) {
return `INTERVAL '${tz} seconds'`;
}
return `'${tz}'`;
}
// We assume t is a TIMESTAMP WITH TIME ZONE.
// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone)
// it should be converted with `timezone('utc',t)` to a type with time zone.
// Note that by default CARTO uses timestamp with time zone columns for dates
// and VectorMapConfigAdapter converts them to epoch numbers.
// So, for using this with aggregations, relying on dates & times
// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the
// (converted) column.
2019-10-22 01:07:24 +08:00
function timeExpression (t, tz) {
if (tz !== undefined) {
2018-10-04 03:02:22 +08:00
return `timezone(${timezone(tz)}, ${t})`;
2019-10-22 01:07:24 +08:00
}
return t;
2018-10-04 03:02:22 +08:00
}
2019-10-22 01:07:24 +08:00
function epochWithDefaults (epoch) {
2018-10-07 00:26:43 +08:00
/* jshint maxcomplexity:9 */ // goddammit linter, I like this as is!!
2019-10-25 01:18:47 +08:00
const format = /^(\d\d\d\d)(?:\-?(\d\d)(?:\-?(\d\d)(?:[T\s]?(\d\d)(?:(\d\d)(?:\:(\d\d))?)?)?)?)?$/; // eslint-disable-line no-useless-escape
2019-10-22 01:07:24 +08:00
const match = (epoch || '').match(format) || [];
const year = match[1] || '0001';
const month = match[2] || '01';
const day = match[3] || '01';
const hour = match[4] || '00';
2018-10-04 03:02:22 +08:00
const minute = match[5] || '00';
const second = match[6] || '00';
2018-10-05 01:50:14 +08:00
return `${year}-${month}-${day}T${hour}:${minute}:${second}`;
}
// Epoch should be an ISO timestamp literal without time zone
// (it is interpreted as in the defined timzezone for the input time)
// It can be partial, e.g. 'YYYY', 'YYYY-MM', 'YYYY-MM-DDTHH', etc.
// Defaults are applied: YYYY=0001, MM=01, DD=01, HH=00, MM=00, S=00
// It returns a timestamp without time zone
2019-10-22 01:07:24 +08:00
function epochExpression (epoch) {
2018-10-04 03:02:22 +08:00
return `TIMESTAMP '${epoch}'`;
2019-10-22 01:07:24 +08:00
}
2018-10-04 03:02:22 +08:00
const YEARSPAN = "(date_part('year', $t)-date_part('year', $epoch))";
// Note that SECONDSPAN is not a UTC epoch, but an epoch in the specified TZ,
// so we can use it to compute any multiple of seconds with it without using date_part or date_trunc
const SECONDSPAN = "(date_part('epoch', $t) - date_part('epoch', $epoch))";
const serialParts = {
2018-09-21 03:12:54 +08:00
second: {
sql: `FLOOR(${SECONDSPAN})`,
2018-09-21 03:12:54 +08:00
zeroBased: true
},
minute: {
sql: `FLOOR(${SECONDSPAN}/60)`,
2018-09-21 03:12:54 +08:00
zeroBased: true
},
hour: {
sql: `FLOOR(${SECONDSPAN}/3600)`,
2018-09-21 03:12:54 +08:00
zeroBased: true
},
day: {
sql: `1 + FLOOR(${SECONDSPAN}/86400)`,
2018-09-21 03:12:54 +08:00
zeroBased: false
},
week: {
sql: `1 + FLOOR(${SECONDSPAN}/(7*86400))`,
zeroBased: false
2018-09-21 03:12:54 +08:00
},
month: {
sql: `1 + date_part('month', $t) - date_part('month', $epoch) + 12*${YEARSPAN}`,
2018-09-21 03:12:54 +08:00
zeroBased: false
},
quarter: {
sql: `1 + date_part('quarter', $t) - date_part('quarter', $epoch) + 4*${YEARSPAN}`,
2018-09-21 03:12:54 +08:00
zeroBased: false
},
2018-10-04 03:02:22 +08:00
semester: {
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/6) + 2*${YEARSPAN}`,
zeroBased: false
},
trimester: {
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/4) + 3*${YEARSPAN}`,
zeroBased: false
},
2018-09-21 03:12:54 +08:00
year: {
2018-10-04 03:02:22 +08:00
// for the default epoch this coincides with date_part('year', $t)
sql: `1 + ${YEARSPAN}`,
2018-09-21 03:12:54 +08:00
zeroBased: false
2018-10-04 03:02:22 +08:00
},
decade: {
// for the default epoch this coincides with date_part('decade', $t)
sql: `FLOOR((${YEARSPAN} + 1)/10)`,
zeroBased: true
},
century: {
// for the default epoch this coincides with date_part('century', $t)
sql: `1 + FLOOR(${YEARSPAN}/100)`,
zeroBased: false
},
millennium: {
// for the default epoch this coincides with date_part('millennium', $t)
sql: `1 + FLOOR(${YEARSPAN}/1000)`,
zeroBased: false
2018-09-21 03:12:54 +08:00
}
};
2019-10-22 01:07:24 +08:00
function serialSqlExpr (params) {
2018-10-06 02:08:40 +08:00
const { sql, zeroBased } = serialParts[params.units];
2018-10-04 03:02:22 +08:00
const column = timeExpression(params.time, params.timezone);
2019-10-22 01:07:24 +08:00
const epoch = epochExpression(params.starting);
const serial = sql.replace(/\$t/g, column).replace(/\$epoch/g, epoch);
2018-09-21 03:12:54 +08:00
let expr = serial;
2018-10-04 03:02:22 +08:00
if (params.count !== 1) {
2018-09-21 03:12:54 +08:00
if (zeroBased) {
2018-10-04 03:02:22 +08:00
expr = `FLOOR((${expr})/(${params.count}::double precision))::int`;
2018-09-21 03:12:54 +08:00
} else {
2018-10-04 03:02:22 +08:00
expr = `CEIL((${expr})/(${params.count}::double precision))::int`;
2018-09-21 03:12:54 +08:00
}
} else {
expr = `(${expr})::int`;
2018-09-21 03:12:54 +08:00
}
return expr;
}
const isoParts = {
2019-10-22 01:07:24 +08:00
second: 'to_char($t, \'YYYY-MM-DD"T"HH24:MI:SS\')',
minute: 'to_char($t, \'YYYY-MM-DD"T"HH24:MI\')',
hour: 'to_char($t, \'YYYY-MM-DD"T"HH24\')',
day: 'to_char($t, \'YYYY-MM-DD\')',
month: 'to_char($t, \'YYYY-MM\')',
year: 'to_char($t, \'YYYY\')',
week: 'to_char($t, \'IYYY-"W"IW\')',
quarter: 'to_char($t, \'YYYY-"Q"Q\')',
semester: 'to_char($t, \'YYYY"S"\') || to_char(CEIL(date_part(\'month\', $t)/6), \'9\')',
trimester: 'to_char($t, \'YYYY"t"\') || to_char(CEIL(date_part(\'month\', $t)/4), \'9\')',
decade: 'to_char(date_part(\'decade\', $t), \'"D"999\')',
century: 'to_char($t, \'"C"CC\')',
millennium: 'to_char(date_part(\'millennium\', $t), \'"M"999\')'
};
2019-10-22 01:07:24 +08:00
function isoSqlExpr (params) {
2018-10-04 03:02:22 +08:00
const column = timeExpression(params.time, params.timezone);
if (params.count > 1) {
// TODO: it would be sensible to return the ISO of the first unit in the period
throw new Error('Multiple time units not supported for ISO format');
}
2018-10-06 02:08:40 +08:00
return isoParts[params.units].replace(/\$t/g, column);
}
2018-10-04 03:02:22 +08:00
const cyclicParts = {
2019-10-22 01:07:24 +08:00
dayOfWeek: 'date_part(\'isodow\', $t)', // 1 = monday to 7 = sunday;
dayOfMonth: 'date_part(\'day\', $t)', // 1 to 31
dayOfYear: 'date_part(\'doy\', $t)', // 1 to 366
hourOfDay: 'date_part(\'hour\', $t)', // 0 to 23
monthOfYear: 'date_part(\'month\', $t)', // 1 to 12
quarterOfYear: 'date_part(\'quarter\', $t)', // 1 to 4
semesterOfYear: 'FLOOR((date_part(\'month\', $t)-1)/6.0) + 1', // 1 to 2
trimesterOfYear: 'FLOOR((date_part(\'month\', $t)-1)/4.0) + 1', // 1 to 3
weekOfYear: 'date_part(\'week\', $t)', // 1 to 53
minuteOfHour: 'date_part(\'minute\', $t)' // 0 to 59
2018-10-04 03:02:22 +08:00
};
2019-10-22 01:07:24 +08:00
function cyclicSqlExpr (params) {
2018-10-04 03:02:22 +08:00
const column = timeExpression(params.time, params.timezone);
2018-10-06 02:08:40 +08:00
return cyclicParts[params.units].replace(/\$t/g, column);
2018-09-21 03:12:54 +08:00
}
2018-10-06 02:08:40 +08:00
const ACCEPTED_PARAMETERS = ['time', 'units', 'timezone', 'count', 'starting', 'format'];
const REQUIRED_PARAMETERS = ['time', 'units'];
2018-10-04 03:02:22 +08:00
2019-10-22 01:07:24 +08:00
function validateParameters (params, checker) {
2018-10-04 03:02:22 +08:00
const errors = [];
const presentParams = Object.keys(params);
const invalidParams = presentParams.filter(param => !ACCEPTED_PARAMETERS.includes(param));
if (invalidParams.length) {
errors.push(`Invalid parameters: ${invalidParams.join(', ')}`);
2018-09-21 03:12:54 +08:00
}
2018-10-04 03:02:22 +08:00
const missingParams = REQUIRED_PARAMETERS.filter(param => !presentParams.includes(param));
if (missingParams.length) {
errors.push(`Missing parameters: ${missingParams.join(', ')}`);
2018-09-21 03:12:54 +08:00
}
2019-11-14 18:36:47 +08:00
const paramsErrors = checker(params);
errors.push(...paramsErrors.errors);
2018-10-04 03:02:22 +08:00
if (errors.length) {
2019-10-22 01:07:24 +08:00
throw new Error(`Invalid time dimension:\n${errors.join('\n')}`);
2018-09-21 03:12:54 +08:00
}
2019-11-14 18:36:47 +08:00
return paramsErrors.params;
2018-09-21 03:12:54 +08:00
}
2018-10-06 02:08:40 +08:00
const VALID_CYCLIC_UNITS = Object.keys(cyclicParts);
const VALID_SERIAL_UNITS = Object.keys(serialParts);
const VALID_ISO_UNITS = Object.keys(isoParts);
2018-09-21 03:12:54 +08:00
2019-10-22 01:07:24 +08:00
function cyclicCheckParams (params) {
2018-10-04 03:02:22 +08:00
const errors = [];
2018-10-06 02:08:40 +08:00
if (!VALID_CYCLIC_UNITS.includes(params.units)) {
errors.push(`Invalid units "${params.units}"`);
2018-09-21 03:12:54 +08:00
}
2018-10-05 01:50:14 +08:00
if (params.count && params.count > 1) {
2018-10-06 02:08:40 +08:00
errors.push(`Count ${params.count} not supported for cyclic ${params.units}`);
2018-10-05 01:50:14 +08:00
}
return { errors: errors, params: params };
2018-09-21 03:12:54 +08:00
}
2019-10-22 01:07:24 +08:00
function serialCheckParams (params) {
2018-10-04 03:02:22 +08:00
const errors = [];
2018-10-06 02:08:40 +08:00
if (!VALID_SERIAL_UNITS.includes(params.units)) {
errors.push(`Invalid grouping units "${params.units}"`);
2018-10-04 03:02:22 +08:00
}
2018-10-05 01:50:14 +08:00
return { errors: errors, params: Object.assign({}, params, { starting: epochWithDefaults(params.starting) }) };
2018-09-21 03:12:54 +08:00
}
2019-10-22 01:07:24 +08:00
function isoCheckParams (params) {
2018-10-04 03:02:22 +08:00
const errors = [];
2018-10-06 02:08:40 +08:00
if (!VALID_ISO_UNITS.includes(params.units)) {
errors.push(`Invalid units "${params.units}"`);
2018-10-04 03:02:22 +08:00
}
if (params.starting) {
errors.push("Parameter 'starting' not supported for ISO format");
}
2018-10-05 01:50:14 +08:00
return { errors: errors, params: params };
2018-10-04 03:02:22 +08:00
}
const CLASSIFIERS = {
cyclic: {
sqlExpr: cyclicSqlExpr,
checkParams: cyclicCheckParams
},
iso: {
sqlExpr: isoSqlExpr,
checkParams: isoCheckParams
},
serial: {
sqlExpr: serialSqlExpr,
checkParams: serialCheckParams
}
};
2019-10-22 01:07:24 +08:00
function isCyclic (units) {
2018-10-06 02:08:40 +08:00
return VALID_CYCLIC_UNITS.includes(units);
}
2019-10-22 01:07:24 +08:00
function classifierFor (params) {
2018-10-04 03:02:22 +08:00
let classifier = 'serial';
2018-10-06 02:08:40 +08:00
if (params.units && isCyclic(params.units)) {
2018-10-04 03:02:22 +08:00
classifier = 'cyclic';
} else if (params.format === 'iso') {
2018-10-04 03:02:22 +08:00
classifier = 'iso';
2018-09-21 03:12:54 +08:00
}
2018-10-04 03:02:22 +08:00
return CLASSIFIERS[classifier];
2018-09-21 03:12:54 +08:00
}
2018-10-04 03:02:22 +08:00
2019-10-22 01:07:24 +08:00
function classificationSql (params) {
2018-10-04 03:02:22 +08:00
const classifier = classifierFor(params);
2018-10-05 01:50:14 +08:00
params = validateParameters(params, classifier.checkParams);
return { sql: classifier.sqlExpr(params), effectiveParams: params };
2018-10-04 03:02:22 +08:00
}
2018-10-05 01:50:14 +08:00
module.exports = classificationSql;