Refactor time dimensions
This commit is contained in:
parent
aff55351ad
commit
c588d4139e
@ -122,10 +122,11 @@ const timeDimensionParameters = definition => {
|
|||||||
// definition.column should correspond to a wrapped date column
|
// definition.column should correspond to a wrapped date column
|
||||||
return {
|
return {
|
||||||
time: `to_timestamp("${definition.column}")`,
|
time: `to_timestamp("${definition.column}")`,
|
||||||
timeZone: definition.timezone || 'utc',
|
timezone: definition.timezone || 'utc',
|
||||||
groupBy: definition.group_by,
|
grouping: definition.group_by,
|
||||||
groupByCount: definition.group_by_count || 1,
|
count: definition.group_by_count || 1,
|
||||||
starting: definition.starting
|
starting: definition.starting,
|
||||||
|
format: definition.format
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,3 +1,48 @@
|
|||||||
|
// timezones can be defined either by an numeric offset in seconds or by
|
||||||
|
// a valid (case-insensitive) tz/PG name;
|
||||||
|
// they include abbreviations defined by PG (which have precedence and
|
||||||
|
// are fixed offsets, not handling DST) or general names that can handle DST.
|
||||||
|
function timezone(tz) {
|
||||||
|
if (isFinite(tz)) {
|
||||||
|
return `INTERVAL '${tz} seconds'`;
|
||||||
|
}
|
||||||
|
return `'${tz}'`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We assume t is a TIMESTAMP WITH TIME ZONE.
|
||||||
|
// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone)
|
||||||
|
// it should be converted with `timezone('utc',t)` to a type with time zone.
|
||||||
|
// Note that by default CARTO uses timestamp with time zone columns for dates
|
||||||
|
// and VectorMapConfigAdapter converts them to epoch numbers.
|
||||||
|
// So, for using this with aggregations, relying on dates & times
|
||||||
|
// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the
|
||||||
|
// (converted) column.
|
||||||
|
function timeExpression(t, tz) {
|
||||||
|
if (tz !== undefined) {
|
||||||
|
return `timezone(${timezone(tz)}, ${t})`;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Epoch should be an ISO timestamp literal without time zone
|
||||||
|
// (it is interpreted as in the defined timzezone for the input time)
|
||||||
|
// It can be partial, e.g. 'YYYY', 'YYYY-MM', 'YYYY-MM-DDTHH', etc.
|
||||||
|
// Defaults are applied: YYYY=0001, MM=01, DD=01, HH=00, MM=00, S=00
|
||||||
|
// It returns a timestamp without time zone
|
||||||
|
function epochExpression(epoch) {
|
||||||
|
/* jshint maxcomplexity:8 */ // goddammit linter, I like this as is!!
|
||||||
|
const format = /^(\d\d\d\d)(?:\-?(\d\d)(?:\-?(\d\d)(?:[T\s]?(\d\d)(?:(\d\d)(?:\:(\d\d))?)?)?)?)?$/;
|
||||||
|
const match = epoch.match(format) || [];
|
||||||
|
const year = match[1] || '0001';
|
||||||
|
const month = match[2] || '01';
|
||||||
|
const day = match[3] || '01';
|
||||||
|
const hour = match[4] || '00';
|
||||||
|
const minute = match[5] || '00';
|
||||||
|
const second = match[6] || '00';
|
||||||
|
epoch = `${year}-${month}-${day}T${hour}:${minute}:${second}`;
|
||||||
|
return `TIMESTAMP '${epoch}'`;
|
||||||
|
}
|
||||||
|
|
||||||
const YEARSPAN = "(date_part('year', $t)-date_part('year', $epoch))";
|
const YEARSPAN = "(date_part('year', $t)-date_part('year', $epoch))";
|
||||||
// Note that SECONDSPAN is not a UTC epoch, but an epoch in the specified TZ,
|
// Note that SECONDSPAN is not a UTC epoch, but an epoch in the specified TZ,
|
||||||
// so we can use it to compute any multiple of seconds with it without using date_part or date_trunc
|
// so we can use it to compute any multiple of seconds with it without using date_part or date_trunc
|
||||||
@ -32,25 +77,47 @@ const serialParts = {
|
|||||||
sql: `1 + date_part('quarter', $t) - date_part('quarter', $epoch) + 4*${YEARSPAN}`,
|
sql: `1 + date_part('quarter', $t) - date_part('quarter', $epoch) + 4*${YEARSPAN}`,
|
||||||
zeroBased: false
|
zeroBased: false
|
||||||
},
|
},
|
||||||
|
semester: {
|
||||||
|
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/6) + 2*${YEARSPAN}`,
|
||||||
|
zeroBased: false
|
||||||
|
},
|
||||||
|
trimester: {
|
||||||
|
sql: `1 + FLOOR((date_part('month', $t) - date_part('month', $epoch))/4) + 3*${YEARSPAN}`,
|
||||||
|
zeroBased: false
|
||||||
|
},
|
||||||
year: {
|
year: {
|
||||||
// TODO: isn't more meaningful to ignore the epoch here and return date_part('year', $t)
|
// for the default epoch this coincides with date_part('year', $t)
|
||||||
sql: `1 + ${YEARSPAN}`,
|
sql: `1 + ${YEARSPAN}`,
|
||||||
zeroBased: false
|
zeroBased: false
|
||||||
|
},
|
||||||
|
decade: {
|
||||||
|
// for the default epoch this coincides with date_part('decade', $t)
|
||||||
|
sql: `FLOOR((${YEARSPAN} + 1)/10)`,
|
||||||
|
zeroBased: true
|
||||||
|
},
|
||||||
|
century: {
|
||||||
|
// for the default epoch this coincides with date_part('century', $t)
|
||||||
|
sql: `1 + FLOOR(${YEARSPAN}/100)`,
|
||||||
|
zeroBased: false
|
||||||
|
},
|
||||||
|
millennium: {
|
||||||
|
// for the default epoch this coincides with date_part('millennium', $t)
|
||||||
|
sql: `1 + FLOOR(${YEARSPAN}/1000)`,
|
||||||
|
zeroBased: false
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
function serialSqlExpr(time, timeZone, groupBy, count = 1, starting = undefined) {
|
function serialSqlExpr(params) {
|
||||||
[groupBy, count] = serialNormalize(groupBy, count);
|
const { sql, zeroBased } = serialParts[params.grouping];
|
||||||
let { sql, zeroBased } = serialParts[groupBy];
|
const column = timeExpression(params.time, params.timezone);
|
||||||
const column = timeExpression(time, timeZone);
|
const epoch = epochExpression(params.starting);
|
||||||
const epoch = epochExpression(starting);
|
|
||||||
const serial = sql.replace(/\$t/g, column).replace(/\$epoch/g, epoch);
|
const serial = sql.replace(/\$t/g, column).replace(/\$epoch/g, epoch);
|
||||||
let expr = serial;
|
let expr = serial;
|
||||||
if (count !== 1) {
|
if (params.count !== 1) {
|
||||||
if (zeroBased) {
|
if (zeroBased) {
|
||||||
expr = `FLOOR((${expr})/(${count}::double precision))::int`;
|
expr = `FLOOR((${expr})/(${params.count}::double precision))::int`;
|
||||||
} else {
|
} else {
|
||||||
expr = `CEIL((${expr})/(${count}::double precision))::int`;
|
expr = `CEIL((${expr})/(${params.count}::double precision))::int`;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
expr = `(${expr})::int`;
|
expr = `(${expr})::int`;
|
||||||
@ -71,184 +138,141 @@ const isoParts = {
|
|||||||
trimester: `to_char($t, 'YYYY"t"') || to_char(CEIL(date_part('month', $t)/4), '9')`,
|
trimester: `to_char($t, 'YYYY"t"') || to_char(CEIL(date_part('month', $t)/4), '9')`,
|
||||||
decade: `to_char(date_part('decade', $t), '"D"999')`,
|
decade: `to_char(date_part('decade', $t), '"D"999')`,
|
||||||
century: `to_char($t, '"C"CC')`,
|
century: `to_char($t, '"C"CC')`,
|
||||||
millennium: `to_char(date_part('millenium', $t), '"M"999')`
|
millennium: `to_char(date_part('millennium', $t), '"M"999')`
|
||||||
};
|
};
|
||||||
|
|
||||||
function isoSqlExpr(time, timeZone, groupBy, count = 1) {
|
function isoSqlExpr(params) {
|
||||||
const column = timeExpression(time, timeZone);
|
const column = timeExpression(params.time, params.timezone);
|
||||||
if (count > 1) {
|
if (params.count > 1) {
|
||||||
// TODO: it would be sensible to return the ISO of the firt unit in the period
|
// TODO: it would be sensible to return the ISO of the first unit in the period
|
||||||
throw new Error('Multiple time units not supported for ISO format');
|
throw new Error('Multiple time units not supported for ISO format');
|
||||||
}
|
}
|
||||||
return isoParts[groupBy].replace(/\$t/g, column);
|
return isoParts[params.grouping].replace(/\$t/g, column);
|
||||||
}
|
}
|
||||||
|
|
||||||
function serialNormalize(groupBy, count) {
|
const cyclicParts = {
|
||||||
if (groupBy === 'semester') {
|
dayOfWeek: `date_part('isodow', $t)`, // 1 = monday to 7 = sunday;
|
||||||
groupBy = 'month';
|
dayOfMonth: `date_part('day', $t)`, // 1 to 31
|
||||||
count *= 6;
|
dayOfYear: `date_part('doy', $t)`, // 1 to 366
|
||||||
} else if (groupBy === 'trimester') {
|
hourOfDay: `date_part('hour', $t)`, // 0 to 23
|
||||||
groupBy = 'month';
|
monthOfYear: `date_part('month', $t)`, // 1 to 12
|
||||||
count *= 4;
|
quarterOfYear: `date_part('quarter', $t)`, // 1 to 4
|
||||||
} else if (groupBy === 'decade') {
|
semesterOfYear: `FLOOR((date_part('month', $t)-1)/6.0) + 1`, // 1 to 2
|
||||||
groupBy = 'year';
|
trimesterOfYear: `FLOOR((date_part('month', $t)-1)/4.0) + 1`, // 1 to 3
|
||||||
count *= 10;
|
weekOfYear: `date_part('week', $t)`, // 1 to 53
|
||||||
} else if (groupBy === 'century') {
|
minuteOfHour: `date_part('minute', $t)` // 0 to 59
|
||||||
groupBy = 'year';
|
};
|
||||||
count *= 100;
|
|
||||||
} else if (groupBy === 'millenium') {
|
function cyclicSqlExpr(params) {
|
||||||
groupBy = 'year';
|
const column = timeExpression(params.time, params.timezone);
|
||||||
count *= 1000;
|
return isoParts[params.grouping].replace(/\$t/g, column);
|
||||||
}
|
|
||||||
return [groupBy, count];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function cyclicNormalize(groupBy, count) {
|
const ACCEPTED_PARAMETERS = ['time', 'grouping', 'timezone', 'count', 'starting', 'format'];
|
||||||
if (groupBy === 'monthOfYear' && count === 3) {
|
const REQUIRED_PARAMETERS = ['time', 'grouping'];
|
||||||
groupBy = 'quarterOfYear';
|
|
||||||
count = 1;
|
function validateParameters(params, checker) {
|
||||||
} else if (groupBy === 'monthOfYear' && count === 6) {
|
const errors = [];
|
||||||
groupBy = 'semesterOfYear';
|
const presentParams = Object.keys(params);
|
||||||
count = 1;
|
const invalidParams = presentParams.filter(param => !ACCEPTED_PARAMETERS.includes(param));
|
||||||
} else if (groupBy === 'monthOfYear' && count === 4) {
|
if (invalidParams.length) {
|
||||||
groupBy = 'trimesterOfYear';
|
errors.push(`Invalid parameters: ${invalidParams.join(', ')}`);
|
||||||
count = 1;
|
|
||||||
}
|
}
|
||||||
if (count !== 1) {
|
const missingParams = REQUIRED_PARAMETERS.filter(param => !presentParams.includes(param));
|
||||||
throw new Error(`invalid multiplicity ${count} for cyclic ${groupBy}`);
|
if (missingParams.length) {
|
||||||
|
errors.push(`Missing parameters: ${missingParams.join(', ')}`);
|
||||||
}
|
}
|
||||||
return [groupBy, count];
|
errors.push(...checker(params));
|
||||||
}
|
if (errors.length) {
|
||||||
|
throw new Error(`Invalid time dimension:\n${errors.join("\n")}`);
|
||||||
// timezones can be defined either by an numeric offset in seconds or by
|
|
||||||
// a valid (case-insensitive) tz/PG name;
|
|
||||||
// they include abbreviations defined by PG (which have precedence and
|
|
||||||
// are fixed offsets, not handling DST) or general names that can handle DST.
|
|
||||||
function timezone(tz) {
|
|
||||||
if (isFinite(tz)) {
|
|
||||||
return `INTERVAL '${tz} seconds'`;
|
|
||||||
}
|
|
||||||
return `'${tz}'`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We assume t is a TIMESTAMP WITH TIME ZONE.
|
|
||||||
// If this was to be used with a t which is a TIMESTAMP or TIME (no time zone)
|
|
||||||
// it should be converted with `timezone('utc',t)` to a type with time zone.
|
|
||||||
// Note that by default CARTO uses timestamp with time zone columns for dates
|
|
||||||
// and VectorMapConfigAdapter converts them to epoch numbers.
|
|
||||||
// So, for using this with aggregations, relying on dates & times
|
|
||||||
// converted to UTC UNIX epoch numbers, apply `to_timestamp` to the
|
|
||||||
// (converted) column.
|
|
||||||
function timeExpression(t, tz) {
|
|
||||||
if (tz !== undefined) {
|
|
||||||
return `timezone(${timezone(tz)}, ${t})`;
|
|
||||||
}
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Epoch should be an ISO timestamp literal without time zone
|
|
||||||
// (it is interpreted as in the defined timzezone for the input time)
|
|
||||||
// It can be partial, e.g. 'YYYY', 'YYYY-MM', 'YYYY-MM-DDTHH', etc.
|
|
||||||
// Defaults are applied: YYYY=0001, MM=01, DD=01, HH=00, MM=00, S=00
|
|
||||||
// It returns a timestamp without time zone
|
|
||||||
function epochExpression(epoch) {
|
|
||||||
const format = /^(\d\d\d\d)(?:\-?(\d\d)(?:\-?(\d\d)(?:[T\s]?(\d\d)(?:(\d\d)(?:\:(\d\d))?)?)?)?)?$/;
|
|
||||||
const match = epoch.match(format) || [];
|
|
||||||
const year = match[1] || '0001';
|
|
||||||
const month = match[2] || '01';
|
|
||||||
const day = match[3] || '01';
|
|
||||||
const hour = match[4] || '00';
|
|
||||||
const minute = match[5] || '00';
|
|
||||||
const second = match[6]t || '00';
|
|
||||||
epoch = `${year}-${month}-${day}T${hour}:${minute}:${second}`;
|
|
||||||
return `TIMESTAMP '${epoch}'`;
|
|
||||||
}
|
|
||||||
|
|
||||||
function cyclicSqlExpr(time, timeZone, groupBy, count = 1) {
|
|
||||||
[groupBy, count] = cyclicNormalize(groupBy, count);
|
|
||||||
const column = timeExpression(time, timeZone);
|
|
||||||
|
|
||||||
if (count === 1) {
|
|
||||||
switch (groupBy) {
|
|
||||||
case 'dayOfWeek':
|
|
||||||
// 1 = monday; 7 = sunday;
|
|
||||||
return `date_part('isodow', ${column})`;
|
|
||||||
|
|
||||||
case 'dayOfMonth':
|
|
||||||
// result: 1-31
|
|
||||||
return `date_part('day', ${column})`;
|
|
||||||
|
|
||||||
case 'dayOfYear':
|
|
||||||
// result: 1-366
|
|
||||||
return `date_part('doy', ${column})`;
|
|
||||||
|
|
||||||
case 'hourOfDay':
|
|
||||||
// result: 0-23
|
|
||||||
return `date_part('hour', ${column})`;
|
|
||||||
|
|
||||||
case 'monthOfYear':
|
|
||||||
// result 1-12
|
|
||||||
return `date_part('month', ${column})`;
|
|
||||||
|
|
||||||
case 'quarterOfYear':
|
|
||||||
// result 1-4
|
|
||||||
return `date_part('quarter', ${column})`;
|
|
||||||
|
|
||||||
case 'semesterOfYear':
|
|
||||||
// result 1-2
|
|
||||||
return `FLOOR((date_part('month', ${column})-1)/6.0) + 1`;
|
|
||||||
|
|
||||||
case 'trimesterOfYear':
|
|
||||||
// result 1-3
|
|
||||||
return `FLOOR((date_part('month', ${column})-1)/4.0) + 1`;
|
|
||||||
|
|
||||||
case 'weekOfYear':
|
|
||||||
// result 1-53
|
|
||||||
return `date_part('week', ${column})`;
|
|
||||||
|
|
||||||
case 'minuteOfHour':
|
|
||||||
// result 0-59
|
|
||||||
return `date_part('minute', ${column})`;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new Error(`Invalid cyclic time grouping ${groupBy} with count ${count}`)
|
|
||||||
|
const VALID_CYCLIC_GROUPINGS = Object.keys(cyclicParts);
|
||||||
|
const VALID_SERIAL_GROUPINGS = Object.keys(serialParts);
|
||||||
|
const VALID_ISO_GROUPINGS = Object.keys(isoParts);
|
||||||
|
|
||||||
|
const MONTH_GROUPING = {
|
||||||
|
3: 'quarterOfYear',
|
||||||
|
6: 'semesterOfYear',
|
||||||
|
4: 'trimesterOfYear'
|
||||||
|
};
|
||||||
|
|
||||||
|
function cyclicCheckParams(params) {
|
||||||
|
const errors = [];
|
||||||
|
if (!VALID_CYCLIC_GROUPINGS.includes(params.grouping)) {
|
||||||
|
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||||
|
} else {
|
||||||
|
if (params.count && params.count > 1) {
|
||||||
|
let fixed = false;
|
||||||
|
if (params.grouping === 'monthOfYear') {
|
||||||
|
const grouping = MONTH_GROUPING[params.count];
|
||||||
|
if (grouping) {
|
||||||
|
params.grouping = grouping;
|
||||||
|
params.count = 1;
|
||||||
|
fixed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!fixed) {
|
||||||
|
errors.push(`Invalid count ${params.count} for cyclic ${params.grouping}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validateParameters(_params) {
|
function serialCheckParams(params) {
|
||||||
return true;
|
const errors = [];
|
||||||
|
if (!VALID_SERIAL_GROUPINGS.includes(params.grouping)) {
|
||||||
|
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||||
}
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isoCheckParams(params) {
|
||||||
|
const errors = [];
|
||||||
|
if (!VALID_ISO_GROUPINGS.includes(params.grouping)) {
|
||||||
|
errors.push(`Invalid grouping "${params.grouping}"`);
|
||||||
|
}
|
||||||
|
if (params.starting) {
|
||||||
|
errors.push("Parameter 'starting' not supported for ISO format");
|
||||||
|
}
|
||||||
|
return errors;
|
||||||
|
}
|
||||||
|
|
||||||
|
const CLASSIFIERS = {
|
||||||
|
cyclic: {
|
||||||
|
sqlExpr: cyclicSqlExpr,
|
||||||
|
checkParams: cyclicCheckParams
|
||||||
|
},
|
||||||
|
iso: {
|
||||||
|
sqlExpr: isoSqlExpr,
|
||||||
|
checkParams: isoCheckParams
|
||||||
|
},
|
||||||
|
serial: {
|
||||||
|
sqlExpr: serialSqlExpr,
|
||||||
|
checkParams: serialCheckParams
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
function isCyclic(groupBy) {
|
function isCyclic(groupBy) {
|
||||||
return groupBy.match(/.+By.+/);
|
return groupBy.match(/.+By.+/);
|
||||||
}
|
}
|
||||||
|
|
||||||
function classificationSql(params) {
|
function classifierFor(params) {
|
||||||
validateParameters(params);
|
let classifier = 'serial';
|
||||||
if (isCyclic(params.group_by)) {
|
if (params.grouping && isCyclic(params.grouping)) {
|
||||||
// TODO: validate group_by_count === 1, No epoch
|
classifier = 'cyclic';
|
||||||
return cyclicSqlExpr(
|
|
||||||
params.time,
|
|
||||||
params.timeZone,
|
|
||||||
params.groupBy,
|
|
||||||
params.groupByCount
|
|
||||||
);
|
|
||||||
} else if (params.format === 'iso') {
|
} else if (params.format === 'iso') {
|
||||||
// TODO: validate group_by_count === 1, No epoch
|
classifier = 'iso';
|
||||||
return isoSqlExpr(
|
}
|
||||||
params.time,
|
return CLASSIFIERS[classifier];
|
||||||
params.timeZone,
|
}
|
||||||
params.groupBy,
|
|
||||||
params.groupByCount
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return serialSqlExpr(
|
|
||||||
params.time,
|
|
||||||
params.timeZone,
|
|
||||||
params.groupBy,
|
|
||||||
params.groupByCount,
|
|
||||||
params.starting
|
|
||||||
);
|
|
||||||
|
|
||||||
|
function classificationSql(params) {
|
||||||
|
const classifier = classifierFor(params);
|
||||||
|
validateParameters(params, classifier.checkParams);
|
||||||
|
return classifier.sqlExpr(params);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
module.exports = classificationSql;
|
module.exports = classificationSql;
|
Loading…
Reference in New Issue
Block a user