Refactor time dimensions
This commit is contained in:
parent
aff55351ad
commit
c588d4139e
@ -122,10 +122,11 @@ const timeDimensionParameters = definition => {
|
||||
// definition.column should correspond to a wrapped date column
|
||||
return {
|
||||
time: `to_timestamp("${definition.column}")`,
|
||||
timeZone: definition.timezone || 'utc',
|
||||
groupBy: definition.group_by,
|
||||
groupByCount: definition.group_by_count || 1,
|
||||
starting: definition.starting
|
||||
timezone: definition.timezone || 'utc',
|
||||
grouping: definition.group_by,
|
||||
count: definition.group_by_count || 1,
|
||||
starting: definition.starting,
|
||||
format: definition.format
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -1,3 +1,48 @@
|
||||
// timezones can be defined either by an numeric offset in seconds or by
|
||||
// a valid (case-insensitive) tz/PG name;
|
||||
// they include abbreviations defined by PG (which have precedence and
|
||||
// are fixed offsets, not handling DST) or general names that can handle DST.
|
||||
function timezone(tz) {
|
||||
if (isFinite(tz)) {
|
||||
return `INTERVAL '${tz} seconds'`;
|
||||
}
|
||||
return `'${tz}'`;
|
||||
}
|
||||
|
||||
// We assume t is a TIMESTAMP WITH TIME ZONE.
|
||||
// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone)
|
||||
// it should be converted with `timezone('utc',t)` to a type with time zone.
|
||||
// Note that by default CARTO uses timestamp with time zone columns for dates
|
||||
// and VectorMapConfigAdapter converts them to epoch numbers.
|
||||
// So, for using this with aggregations, relying on dates & times
|
||||
// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the
|
||||
// (converted) column.
|
||||
function timeExpression(t, tz) {
|
||||
if (tz !== undefined) {
|
||||
return `timezone(${timezone(tz)}, ${t})`;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
// Epoch should be an ISO timestamp literal without time zone
|
||||
// (it is interpreted as in the defined timzezone for the input time)
|
||||
// It can be partial, e.g. 'YYYY', 'YYYY-MM', 'YYYY-MM-DDTHH', etc.
|
||||
// Defaults are applied: YYYY=0001, MM=01, DD=01, HH=00, MM=00, S=00
|
||||
// It returns a timestamp without time zone
|
||||
function epochExpression(epoch) {
|
||||
/* jshint maxcomplexity:8 */ // goddammit linter, I like this as is!!
|
||||
const format = /^(\d\d\d\d)(?:\-?(\d\d)(?:\-?(\d\d)(?:[T\s]?(\d\d)(?:(\d\d)(?:\:(\d\d))?)?)?)?)?$/;
|
||||
const match = epoch.match(format) || [];
|
||||
const year = match[1] || '0001';
|
||||
const month = match[2] || '01';
|
||||
const day = match[3] || '01';
|
||||
const hour = match[4] || '00';
|
||||
const minute = match[5] || '00';
|
||||
const second = match[6] || '00';
|
||||
epoch = `${year}-${month}-${day}T${hour}:${minute}:${second}`;
|
||||
return `TIMESTAMP '${epoch}'`;
|
||||
}
|
||||
|
||||
const YEARSPAN = "(date_part('year', $t)-date_part('year', $epoch))";
|
||||
// Note that SECONDSPAN is not a UTC epoch, but an epoch in the specified TZ,
|
||||
// so we can use it to compute any multiple of seconds with it without using date_part or date_trunc
|
||||
@ -32,25 +77,47 @@ const serialParts = {
|
||||
sql: `1 + date_part('quarter', $t) - date_part('quarter', $epoch) + 4*${YEARSPAN}`,
|
||||
zeroBased: false
|
||||
},
|
||||
semester: {
|
||||
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/6) + 2*${YEARSPAN}`,
|
||||
zeroBased: false
|
||||
},
|
||||
trimester: {
|
||||
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/4) + 3*${YEARSPAN}`,
|
||||
zeroBased: false
|
||||
},
|
||||
year: {
|
||||
// TODO: isn't more meaningful to ignore the epoch here and return date_part('year', $t)
|
||||
// for the default epoch this coincides with date_part('year', $t)
|
||||
sql: `1 + ${YEARSPAN}`,
|
||||
zeroBased: false
|
||||
},
|
||||
decade: {
|
||||
// for the default epoch this coincides with date_part('decade', $t)
|
||||
sql: `FLOOR((${YEARSPAN} + 1)/10)`,
|
||||
zeroBased: true
|
||||
},
|
||||
century: {
|
||||
// for the default epoch this coincides with date_part('century', $t)
|
||||
sql: `1 + FLOOR(${YEARSPAN}/100)`,
|
||||
zeroBased: false
|
||||
},
|
||||
millennium: {
|
||||
// for the default epoch this coincides with date_part('millennium', $t)
|
||||
sql: `1 + FLOOR(${YEARSPAN}/1000)`,
|
||||
zeroBased: false
|
||||
}
|
||||
};
|
||||
|
||||
function serialSqlExpr(time, timeZone, groupBy, count = 1, starting = undefined) {
|
||||
[groupBy, count] = serialNormalize(groupBy, count);
|
||||
let { sql, zeroBased } = serialParts[groupBy];
|
||||
const column = timeExpression(time, timeZone);
|
||||
const epoch = epochExpression(starting);
|
||||
function serialSqlExpr(params) {
|
||||
const { sql, zeroBased } = serialParts[params.grouping];
|
||||
const column = timeExpression(params.time, params.timezone);
|
||||
const epoch = epochExpression(params.starting);
|
||||
const serial = sql.replace(/\$t/g, column).replace(/\$epoch/g, epoch);
|
||||
let expr = serial;
|
||||
if (count !== 1) {
|
||||
if (params.count !== 1) {
|
||||
if (zeroBased) {
|
||||
expr = `FLOOR((${expr})/(${count}::double precision))::int`;
|
||||
expr = `FLOOR((${expr})/(${params.count}::double precision))::int`;
|
||||
} else {
|
||||
expr = `CEIL((${expr})/(${count}::double precision))::int`;
|
||||
expr = `CEIL((${expr})/(${params.count}::double precision))::int`;
|
||||
}
|
||||
} else {
|
||||
expr = `(${expr})::int`;
|
||||
@ -71,184 +138,141 @@ const isoParts = {
|
||||
trimester: `to_char($t, 'YYYY"t"') || to_char(CEIL(date_part('month', $t)/4), '9')`,
|
||||
decade: `to_char(date_part('decade', $t), '"D"999')`,
|
||||
century: `to_char($t, '"C"CC')`,
|
||||
millennium: `to_char(date_part('millenium', $t), '"M"999')`
|
||||
millennium: `to_char(date_part('millennium', $t), '"M"999')`
|
||||
};
|
||||
|
||||
function isoSqlExpr(time, timeZone, groupBy, count = 1) {
|
||||
const column = timeExpression(time, timeZone);
|
||||
if (count > 1) {
|
||||
// TODO: it would be sensible to return the ISO of the firt unit in the period
|
||||
function isoSqlExpr(params) {
|
||||
const column = timeExpression(params.time, params.timezone);
|
||||
if (params.count > 1) {
|
||||
// TODO: it would be sensible to return the ISO of the first unit in the period
|
||||
throw new Error('Multiple time units not supported for ISO format');
|
||||
}
|
||||
return isoParts[groupBy].replace(/\$t/g, column);
|
||||
return isoParts[params.grouping].replace(/\$t/g, column);
|
||||
}
|
||||
|
||||
function serialNormalize(groupBy, count) {
|
||||
if (groupBy === 'semester') {
|
||||
groupBy = 'month';
|
||||
count *= 6;
|
||||
} else if (groupBy === 'trimester') {
|
||||
groupBy = 'month';
|
||||
count *= 4;
|
||||
} else if (groupBy === 'decade') {
|
||||
groupBy = 'year';
|
||||
count *= 10;
|
||||
} else if (groupBy === 'century') {
|
||||
groupBy = 'year';
|
||||
count *= 100;
|
||||
} else if (groupBy === 'millenium') {
|
||||
groupBy = 'year';
|
||||
count *= 1000;
|
||||
const cyclicParts = {
|
||||
dayOfWeek: `date_part('isodow', $t)`, // 1 = monday to 7 = sunday;
|
||||
dayOfMonth: `date_part('day', $t)`, // 1 to 31
|
||||
dayOfYear: `date_part('doy', $t)`, // 1 to 366
|
||||
hourOfDay: `date_part('hour', $t)`, // 0 to 23
|
||||
monthOfYear: `date_part('month', $t)`, // 1 to 12
|
||||
quarterOfYear: `date_part('quarter', $t)`, // 1 to 4
|
||||
semesterOfYear: `FLOOR((date_part('month', $t)-1)/6.0) + 1`, // 1 to 2
|
||||
trimesterOfYear: `FLOOR((date_part('month', $t)-1)/4.0) + 1`, // 1 to 3
|
||||
weekOfYear: `date_part('week', $t)`, // 1 to 53
|
||||
minuteOfHour: `date_part('minute', $t)` // 0 to 59
|
||||
};
|
||||
|
||||
function cyclicSqlExpr(params) {
|
||||
const column = timeExpression(params.time, params.timezone);
|
||||
return isoParts[params.grouping].replace(/\$t/g, column);
|
||||
}
|
||||
|
||||
const ACCEPTED_PARAMETERS = ['time', 'grouping', 'timezone', 'count', 'starting', 'format'];
|
||||
const REQUIRED_PARAMETERS = ['time', 'grouping'];
|
||||
|
||||
function validateParameters(params, checker) {
|
||||
const errors = [];
|
||||
const presentParams = Object.keys(params);
|
||||
const invalidParams = presentParams.filter(param => !ACCEPTED_PARAMETERS.includes(param));
|
||||
if (invalidParams.length) {
|
||||
errors.push(`Invalid parameters: ${invalidParams.join(', ')}`);
|
||||
}
|
||||
return [groupBy, count];
|
||||
}
|
||||
|
||||
function cyclicNormalize(groupBy, count) {
|
||||
if (groupBy === 'monthOfYear' && count === 3) {
|
||||
groupBy = 'quarterOfYear';
|
||||
count = 1;
|
||||
} else if (groupBy === 'monthOfYear' && count === 6) {
|
||||
groupBy = 'semesterOfYear';
|
||||
count = 1;
|
||||
} else if (groupBy === 'monthOfYear' && count === 4) {
|
||||
groupBy = 'trimesterOfYear';
|
||||
count = 1;
|
||||
const missingParams = REQUIRED_PARAMETERS.filter(param => !presentParams.includes(param));
|
||||
if (missingParams.length) {
|
||||
errors.push(`Missing parameters: ${missingParams.join(', ')}`);
|
||||
}
|
||||
if (count !== 1) {
|
||||
throw new Error(`invalid multiplicity ${count} for cyclic ${groupBy}`);
|
||||
errors.push(...checker(params));
|
||||
if (errors.length) {
|
||||
throw new Error(`Invalid time dimension:\n${errors.join("\n")}`);
|
||||
}
|
||||
return [groupBy, count];
|
||||
}
|
||||
|
||||
// timezones can be defined either by an numeric offset in seconds or by
|
||||
// a valid (case-insensitive) tz/PG name;
|
||||
// they include abbreviations defined by PG (which have precedence and
|
||||
// are fixed offsets, not handling DST) or general names that can handle DST.
|
||||
function timezone(tz) {
|
||||
if (isFinite(tz)) {
|
||||
return `INTERVAL '${tz} seconds'`;
|
||||
}
|
||||
return `'${tz}'`;
|
||||
}
|
||||
const VALID_CYCLIC_GROUPINGS = Object.keys(cyclicParts);
|
||||
const VALID_SERIAL_GROUPINGS = Object.keys(serialParts);
|
||||
const VALID_ISO_GROUPINGS = Object.keys(isoParts);
|
||||
|
||||
// We assume t is a TIMESTAMP WITH TIME ZONE.
|
||||
// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone)
|
||||
// it should be converted with `timezone('utc',t)` to a type with time zone.
|
||||
// Note that by default CARTO uses timestamp with time zone columns for dates
|
||||
// and VectorMapConfigAdapter converts them to epoch numbers.
|
||||
// So, for using this with aggregations, relying on dates & times
|
||||
// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the
|
||||
// (converted) column.
|
||||
function timeExpression(t, tz) {
|
||||
if (tz !== undefined) {
|
||||
return `timezone(${timezone(tz)}, ${t})`;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
const MONTH_GROUPING = {
|
||||
3: 'quarterOfYear',
|
||||
6: 'semesterOfYear',
|
||||
4: 'trimesterOfYear'
|
||||
};
|
||||
|
||||
// Epoch should be an ISO timestamp literal without time zone
|
||||
// (it is interpreted as in the defined timzezone for the input time)
|
||||
// It can be partial, e.g. 'YYYY', 'YYYY-MM', 'YYYY-MM-DDTHH', etc.
|
||||
// Defaults are applied: YYYY=0001, MM=01, DD=01, HH=00, MM=00, S=00
|
||||
// It returns a timestamp without time zone
|
||||
function epochExpression(epoch) {
|
||||
const format = /^(\d\d\d\d)(?:\-?(\d\d)(?:\-?(\d\d)(?:[T\s]?(\d\d)(?:(\d\d)(?:\:(\d\d))?)?)?)?)?$/;
|
||||
const match = epoch.match(format) || [];
|
||||
const year = match[1] || '0001';
|
||||
const month = match[2] || '01';
|
||||
const day = match[3] || '01';
|
||||
const hour = match[4] || '00';
|
||||
const minute = match[5] || '00';
|
||||
const second = match[6]t || '00';
|
||||
epoch = `${year}-${month}-${day}T${hour}:${minute}:${second}`;
|
||||
return `TIMESTAMP '${epoch}'`;
|
||||
}
|
||||
|
||||
function cyclicSqlExpr(time, timeZone, groupBy, count = 1) {
|
||||
[groupBy, count] = cyclicNormalize(groupBy, count);
|
||||
const column = timeExpression(time, timeZone);
|
||||
|
||||
if (count === 1) {
|
||||
switch (groupBy) {
|
||||
case 'dayOfWeek':
|
||||
// 1 = monday; 7 = sunday;
|
||||
return `date_part('isodow', ${column})`;
|
||||
|
||||
case 'dayOfMonth':
|
||||
// result: 1-31
|
||||
return `date_part('day', ${column})`;
|
||||
|
||||
case 'dayOfYear':
|
||||
// result: 1-366
|
||||
return `date_part('doy', ${column})`;
|
||||
|
||||
case 'hourOfDay':
|
||||
// result: 0-23
|
||||
return `date_part('hour', ${column})`;
|
||||
|
||||
case 'monthOfYear':
|
||||
// result 1-12
|
||||
return `date_part('month', ${column})`;
|
||||
|
||||
case 'quarterOfYear':
|
||||
// result 1-4
|
||||
return `date_part('quarter', ${column})`;
|
||||
|
||||
case 'semesterOfYear':
|
||||
// result 1-2
|
||||
return `FLOOR((date_part('month', ${column})-1)/6.0) + 1`;
|
||||
|
||||
case 'trimesterOfYear':
|
||||
// result 1-3
|
||||
return `FLOOR((date_part('month', ${column})-1)/4.0) + 1`;
|
||||
|
||||
case 'weekOfYear':
|
||||
// result 1-53
|
||||
return `date_part('week', ${column})`;
|
||||
|
||||
case 'minuteOfHour':
|
||||
// result 0-59
|
||||
return `date_part('minute', ${column})`;
|
||||
function cyclicCheckParams(params) {
|
||||
const errors = [];
|
||||
if (!VALID_CYCLIC_GROUPINGS.includes(params.grouping)) {
|
||||
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||
} else {
|
||||
if (params.count && params.count > 1) {
|
||||
let fixed = false;
|
||||
if (params.grouping === 'monthOfYear') {
|
||||
const grouping = MONTH_GROUPING[params.count];
|
||||
if (grouping) {
|
||||
params.grouping = grouping;
|
||||
params.count = 1;
|
||||
fixed = true;
|
||||
}
|
||||
}
|
||||
if (!fixed) {
|
||||
errors.push(`Invalid count ${params.count} for cyclic ${params.grouping}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new Error(`Invalid cyclic time grouping ${groupBy} with count ${count}`)
|
||||
return errors;
|
||||
}
|
||||
|
||||
function validateParameters(_params) {
|
||||
return true;
|
||||
function serialCheckParams(params) {
|
||||
const errors = [];
|
||||
if (!VALID_SERIAL_GROUPINGS.includes(params.grouping)) {
|
||||
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
function isoCheckParams(params) {
|
||||
const errors = [];
|
||||
if (!VALID_ISO_GROUPINGS.includes(params.grouping)) {
|
||||
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||
}
|
||||
if (params.starting) {
|
||||
errors.push("Parameter 'starting' not supported for ISO format");
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
const CLASSIFIERS = {
|
||||
cyclic: {
|
||||
sqlExpr: cyclicSqlExpr,
|
||||
checkParams: cyclicCheckParams
|
||||
},
|
||||
iso: {
|
||||
sqlExpr: isoSqlExpr,
|
||||
checkParams: isoCheckParams
|
||||
},
|
||||
serial: {
|
||||
sqlExpr: serialSqlExpr,
|
||||
checkParams: serialCheckParams
|
||||
}
|
||||
};
|
||||
|
||||
function isCyclic(groupBy) {
|
||||
return groupBy.match(/.+By.+/);
|
||||
}
|
||||
|
||||
function classificationSql(params) {
|
||||
validateParameters(params);
|
||||
if (isCyclic(params.group_by)) {
|
||||
// TODO: validate group_by_count === 1, No epoch
|
||||
return cyclicSqlExpr(
|
||||
params.time,
|
||||
params.timeZone,
|
||||
params.groupBy,
|
||||
params.groupByCount
|
||||
);
|
||||
function classifierFor(params) {
|
||||
let classifier = 'serial';
|
||||
if (params.grouping && isCyclic(params.grouping)) {
|
||||
classifier = 'cyclic';
|
||||
} else if (params.format === 'iso') {
|
||||
// TODO: validate group_by_count === 1, No epoch
|
||||
return isoSqlExpr(
|
||||
params.time,
|
||||
params.timeZone,
|
||||
params.groupBy,
|
||||
params.groupByCount
|
||||
);
|
||||
} else {
|
||||
return serialSqlExpr(
|
||||
params.time,
|
||||
params.timeZone,
|
||||
params.groupBy,
|
||||
params.groupByCount,
|
||||
params.starting
|
||||
);
|
||||
|
||||
classifier = 'iso';
|
||||
}
|
||||
return CLASSIFIERS[classifier];
|
||||
}
|
||||
module.exports = classificationSql;
|
||||
|
||||
function classificationSql(params) {
|
||||
const classifier = classifierFor(params);
|
||||
validateParameters(params, classifier.checkParams);
|
||||
return classifier.sqlExpr(params);
|
||||
}
|
||||
|
||||
module.exports = classificationSql;
|
Loading…
Reference in New Issue
Block a user