Merge pull request #723 from CartoDB/fix-time-series-issues

Fix time series issues
This commit is contained in:
Daniel 2017-08-08 10:51:13 +02:00 committed by GitHub
commit 26a967d0a7
3 changed files with 338 additions and 102 deletions

View File

@ -3,6 +3,14 @@
## 3.10.2
Released 2017-mm-dd
Announcements:
- Allow to override with any aggregation for histograms instantiated w/o aggregation.
Bug fixes:
- Apply timezone after truncating the minimun date for each bin to calculate timestamps in time-series.
- Support timestamp with timezones to calculate the number of bins in time-series.
- Fixed issue related to name collision while building time-series query.
## 3.10.1
Released 2017-08-04

View File

@ -9,42 +9,42 @@ var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})");
var dateIntervalQueryTpl = dot.template([
'WITH',
'dates AS (',
'__cdb_dates AS (',
' SELECT',
' MAX({{=it.column}}::timestamp) AS _end,',
' MIN({{=it.column}}::timestamp) AS _start',
' FROM ({{=it.query}}) _cdb_source',
' MAX({{=it.column}}::timestamp) AS __cdb_end,',
' MIN({{=it.column}}::timestamp) AS __cdb_start',
' FROM ({{=it.query}}) __cdb_source',
'),',
'interval_in_days AS (',
'__cdb_interval_in_days AS (',
' SELECT' ,
' DATE_PART(\'day\', _end - _start) AS days',
' FROM dates',
' DATE_PART(\'day\', __cdb_end - __cdb_start) AS __cdb_days',
' FROM __cdb_dates',
'),',
'interval_in_hours AS (',
'__cdb_interval_in_hours AS (',
' SELECT',
' days * 24 + DATE_PART(\'hour\', _end - _start) AS hours',
' FROM interval_in_days, dates',
' __cdb_days * 24 + DATE_PART(\'hour\', __cdb_end - __cdb_start) AS __cdb_hours',
' FROM __cdb_interval_in_days, __cdb_dates',
'),',
'interval_in_minutes AS (',
'__cdb_interval_in_minutes AS (',
' SELECT',
' hours * 60 + DATE_PART(\'minute\', _end - _start) AS minutes',
' FROM interval_in_hours, dates',
' __cdb_hours * 60 + DATE_PART(\'minute\', __cdb_end - __cdb_start) AS __cdb_minutes',
' FROM __cdb_interval_in_hours, __cdb_dates',
'),',
'interval_in_seconds AS (',
'__cdb_interval_in_seconds AS (',
' SELECT',
' minutes * 60 + DATE_PART(\'second\', _end - _start) AS seconds',
' FROM interval_in_minutes, dates',
' __cdb_minutes * 60 + DATE_PART(\'second\', __cdb_end - __cdb_start) AS __cdb_seconds',
' FROM __cdb_interval_in_minutes, __cdb_dates',
')',
'SELECT',
' ROUND(days / 365) AS year,',
' ROUND(days / 90) AS quarter,',
' ROUND(days / 30) AS month,',
' ROUND(days / 7) AS week,',
' days AS day,',
' hours AS hour,',
' minutes AS minute,',
' seconds AS second',
'FROM interval_in_days, interval_in_hours, interval_in_minutes, interval_in_seconds'
' ROUND(__cdb_days / 365) AS year,',
' ROUND(__cdb_days / 90) AS quarter,',
' ROUND(__cdb_days / 30) AS month,',
' ROUND(__cdb_days / 7) AS week,',
' __cdb_days AS day,',
' __cdb_hours AS hour,',
' __cdb_minutes AS minute,',
' __cdb_seconds AS second',
'FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds'
].join('\n'));
var MAX_INTERVAL_VALUE = 366;
@ -52,9 +52,9 @@ var BIN_MIN_NUMBER = 6;
var BIN_MAX_NUMBER = 48;
var filteredQueryTpl = dot.template([
'filtered_source AS (',
'__cdb_filtered_source AS (',
' SELECT *',
' FROM ({{=it._query}}) _cdb_filtered_source',
' FROM ({{=it._query}}) __cdb_filtered_source_query',
' WHERE',
' {{=it._column}} IS NOT NULL',
' {{?it._isFloatColumn}}AND',
@ -67,74 +67,74 @@ var filteredQueryTpl = dot.template([
].join(' \n'));
var basicsQueryTpl = dot.template([
'basics AS (',
'__cdb_basics AS (',
' SELECT',
' max({{=it._column}}) AS max_val, min({{=it._column}}) AS min_val,',
' avg({{=it._column}}) AS avg_val, count(1) AS total_rows',
' FROM filtered_source',
' max({{=it._column}}) AS __cdb_max_val, min({{=it._column}}) AS __cdb_min_val,',
' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows',
' FROM __cdb_filtered_source',
')'
].join(' \n'));
var overrideBasicsQueryTpl = dot.template([
'basics AS (',
'__cdb_basics AS (',
' SELECT',
' max({{=it._end}}) AS max_val, min({{=it._start}}) AS min_val,',
' avg({{=it._column}}) AS avg_val, count(1) AS total_rows',
' FROM filtered_source',
' max({{=it._end}}) AS __cdb_max_val, min({{=it._start}}) AS __cdb_min_val,',
' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows',
' FROM __cdb_filtered_source',
')'
].join('\n'));
var iqrQueryTpl = dot.template([
'iqrange AS (',
' SELECT max(quartile_max) - min(quartile_max) AS iqr',
'__cdb_iqrange AS (',
' SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr',
' FROM (',
' SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (',
' SELECT {{=it._column}} AS _cdb_iqr_column, ntile(4) over (order by {{=it._column}}',
' ) AS quartile',
' FROM filtered_source) _cdb_quartiles',
' FROM __cdb_filtered_source) _cdb_quartiles',
' WHERE quartile = 1 or quartile = 3',
' GROUP BY quartile',
' ) _cdb_iqr',
' ) __cdb_iqr',
')'
].join('\n'));
var binsQueryTpl = dot.template([
'bins AS (',
' SELECT CASE WHEN total_rows = 0 OR iqr = 0',
'__cdb_bins AS (',
' SELECT CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0',
' THEN 1',
' ELSE GREATEST(',
' LEAST({{=it._minBins}}, CAST(total_rows AS INT)),',
' LEAST({{=it._minBins}}, CAST(__cdb_total_rows AS INT)),',
' LEAST(',
' CAST(((max_val - min_val) / (2 * iqr * power(total_rows, 1/3))) AS INT),',
' CAST(((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3))) AS INT),',
' {{=it._maxBins}}',
' )',
' )',
' END AS bins_number',
' FROM basics, iqrange, filtered_source',
' END AS __cdb_bins_number',
' FROM __cdb_basics, __cdb_iqrange, __cdb_filtered_source',
' LIMIT 1',
')'
].join('\n'));
var overrideBinsQueryTpl = dot.template([
'bins AS (',
' SELECT {{=it._bins}} AS bins_number',
'__cdb_bins AS (',
' SELECT {{=it._bins}} AS __cdb_bins_number',
')'
].join('\n'));
var nullsQueryTpl = dot.template([
'nulls AS (',
'__cdb_nulls AS (',
' SELECT',
' count(*) AS nulls_count',
' FROM ({{=it._query}}) _cdb_histogram_nulls',
' count(*) AS __cdb_nulls_count',
' FROM ({{=it._query}}) __cdb_histogram_nulls',
' WHERE {{=it._column}} IS NULL',
')'
].join('\n'));
var infinitiesQueryTpl = dot.template([
'infinities AS (',
'__cdb_infinities AS (',
' SELECT',
' count(*) AS infinities_count',
' FROM ({{=it._query}}) _cdb_histogram_infinities',
' count(*) AS __cdb_infinities_count',
' FROM ({{=it._query}}) __cdb_infinities_query',
' WHERE',
' {{=it._column}} = \'infinity\'::float',
' OR',
@ -143,121 +143,128 @@ var infinitiesQueryTpl = dot.template([
].join('\n'));
var nansQueryTpl = dot.template([
'nans AS (',
'__cdb_nans AS (',
' SELECT',
' count(*) AS nans_count',
' FROM ({{=it._query}}) _cdb_histogram_infinities',
' count(*) AS __cdb_nans_count',
' FROM ({{=it._query}}) __cdb_nans_query',
' WHERE {{=it._column}} = \'NaN\'::float',
')'
].join('\n'));
var histogramQueryTpl = dot.template([
'SELECT',
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
' bins_number,',
' nulls_count,',
' {{?it._isFloatColumn}}infinities_count,',
' nans_count,{{?}}',
' avg_val,',
' CASE WHEN min_val = max_val',
' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,',
' __cdb_bins_number AS bins_number,',
' __cdb_nulls_count AS nulls_count,',
' {{?it._isFloatColumn}}__cdb_infinities_count AS infinities_count,',
' __cdb_nans_count AS nans_count,{{?}}',
' __cdb_avg_val AS avg_val,',
' CASE WHEN __cdb_min_val = __cdb_max_val',
' THEN 0',
' ELSE GREATEST(1, LEAST(WIDTH_BUCKET({{=it._column}}, min_val, max_val, bins_number), bins_number)) - 1',
' ELSE GREATEST(',
' 1,',
' LEAST(',
' WIDTH_BUCKET({{=it._column}}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),',
' __cdb_bins_number',
' )',
' ) - 1',
' END AS bin,',
' min({{=it._column}})::numeric AS min,',
' max({{=it._column}})::numeric AS max,',
' avg({{=it._column}})::numeric AS avg,',
' count(*) AS freq',
'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}}, infinities, nans{{?}}',
'FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls,',
' __cdb_bins{{?it._isFloatColumn}}, __cdb_infinities, __cdb_nans{{?}}',
'GROUP BY bin, bins_number, bin_width, nulls_count,',
' avg_val{{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
'ORDER BY bin'
].join('\n'));
var dateBasicsQueryTpl = dot.template([
'basics AS (',
'__cdb_basics AS (',
' SELECT',
' max(date_part(\'epoch\', {{=it._column}})) AS max_val,',
' min(date_part(\'epoch\', {{=it._column}})) AS min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS avg_val,',
' max(date_part(\'epoch\', {{=it._column}})) AS __cdb_max_val,',
' min(date_part(\'epoch\', {{=it._column}})) AS __cdb_min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,',
' min(date_trunc(',
' \'{{=it._aggregation}}\', {{=it._column}} AT TIME ZONE \'{{=it._offset}}\'',
' )) AS start_date,',
' max({{=it._column}} AT TIME ZONE \'{{=it._offset}}\') AS end_date,',
' count(1) AS total_rows',
' FROM ({{=it._query}}) _cdb_basics',
' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' )) AS __cdb_start_date,',
' max({{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\') AS __cdb_end_date,',
' count(1) AS __cdb_total_rows',
' FROM ({{=it._query}}) __cdb_basics_query',
')'
].join(' \n'));
var dateOverrideBasicsQueryTpl = dot.template([
'basics AS (',
'__cdb_basics AS (',
' SELECT',
' max({{=it._end}}) AS max_val,',
' min({{=it._start}}) AS min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS avg_val,',
' max({{=it._end}}) AS __cdb_max_val,',
' min({{=it._start}}) AS __cdb_min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,',
' min(',
' date_trunc(',
' \'{{=it._aggregation}}\',',
' TO_TIMESTAMP({{=it._start}})::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' )',
' ) AS start_date,',
' ) AS __cdb_start_date,',
' max(',
' TO_TIMESTAMP({{=it._end}})::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' ) AS end_date,',
' count(1) AS total_rows',
' FROM ({{=it._query}}) _cdb_basics',
' ) AS __cdb_end_date,',
' count(1) AS __cdb_total_rows',
' FROM ({{=it._query}}) __cdb_basics_query',
')'
].join(' \n'));
var dateBinsQueryTpl = dot.template([
'bins AS (',
'__cdb_bins AS (',
' SELECT',
' bins_array,',
' ARRAY_LENGTH(bins_array, 1) AS bins_number',
' __cdb_bins_array,',
' ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number',
' FROM (',
' SELECT',
' ARRAY(',
' SELECT GENERATE_SERIES(',
' start_date::timestamptz,',
' end_date::timestamptz,',
' __cdb_start_date::timestamptz,',
' __cdb_end_date::timestamptz,',
' {{?it._aggregation==="quarter"}}\'3 month\'{{??}}\'1 {{=it._aggregation}}\'{{?}}::interval',
' )',
' ) AS bins_array',
' FROM basics',
' ) _cdb_bins_array',
' ) AS __cdb_bins_array',
' FROM __cdb_basics',
' ) __cdb_bins_array_query',
')'
].join('\n'));
var dateHistogramQueryTpl = dot.template([
'SELECT',
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
' bins_number,',
' nulls_count,',
' CASE WHEN min_val = max_val',
' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,',
' __cdb_bins_number AS bins_number,',
' __cdb_nulls_count AS nulls_count,',
' CASE WHEN __cdb_min_val = __cdb_max_val',
' THEN 0',
' ELSE GREATEST(1, LEAST(',
' WIDTH_BUCKET(',
' {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\',',
' bins_array',
' __cdb_bins_array',
' ),',
' bins_number',
' __cdb_bins_number',
' )) - 1',
' END AS bin,',
' min(',
' date_part(',
' \'epoch\', ',
' date_trunc(',
' \'{{=it._aggregation}}\', {{=it._column}}::timestamptz',
' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' ) AT TIME ZONE \'{{=it._offset}}\'',
' )',
' )::numeric AS timestamp,',
' date_part(\'epoch\', start_date)::numeric AS timestamp_start,',
' date_part(\'epoch\', __cdb_start_date)::numeric AS timestamp_start,',
' min(date_part(\'epoch\', {{=it._column}}))::numeric AS min,',
' max(date_part(\'epoch\', {{=it._column}}))::numeric AS max,',
' avg(date_part(\'epoch\', {{=it._column}}))::numeric AS avg,',
' count(*) AS freq',
'FROM ({{=it._query}}) _cdb_histogram, basics, bins, nulls',
'FROM ({{=it._query}}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls',
'WHERE date_part(\'epoch\', {{=it._column}}) IS NOT NULL',
'GROUP BY bin, bins_number, bin_width, nulls_count, avg_val, start_date',
'GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start',
'ORDER BY bin'
].join('\n'));
@ -328,12 +335,16 @@ Histogram.prototype.sql = function(psql, override, callback) {
this._buildQuery(psql, override, callback);
};
Histogram.prototype.isDateHistogram = function (override) {
return this._columnType === 'date' && (this.aggregation !== undefined || override.aggregation !== undefined);
};
Histogram.prototype._buildQuery = function (psql, override, callback) {
var filteredQuery, basicsQuery, binsQuery;
var _column = this.column;
var _query = this.query;
if (this._columnType === 'date' && this.aggregation !== undefined) {
if (this.isDateHistogram(override)) {
return this._buildDateHistogramQuery(psql, override, callback);
}

View File

@ -888,7 +888,6 @@ describe('histogram-dates: aggregation input value', function() {
});
});
describe('histogram-dates: timestamp starts at epoch', function() {
afterEach(function(done) {
@ -958,3 +957,221 @@ describe('histogram-dates: timestamp starts at epoch', function() {
});
});
});
describe('histogram-dates: trunc timestamp for each bin respecting user\'s timezone', function() {
afterEach(function(done) {
if (this.testClient) {
this.testClient.drain(done);
} else {
done();
}
});
var mapConfig = createMapConfig(
[
{
type: "cartodb",
options: {
source: {
id: "a0"
},
cartocss: "#points { marker-width: 10; marker-fill: red; }",
cartocss_version: "2.3.0"
}
}
],
{
timezone_epoch_histogram: {
source: {
id: 'a0'
},
type: 'histogram',
options: {
column: 'd',
aggregation: 'auto'
}
},
timezone_epoch_histogram_tz: {
source: {
id: 'a1'
},
type: 'histogram',
options: {
column: 'd',
aggregation: 'auto'
}
}
},
[
{
id: 'a0',
type: 'source',
params: {
query: [
'select null::geometry the_geom_webmercator, date AS d',
'from generate_series(',
'\'1970-01-01 00:00:00\'::timestamp,',
'\'1970-01-01 01:59:00\'::timestamp,',
' \'1 minute\'::interval',
') date'
].join(' ')
}
},
{
id: 'a1',
type: 'source',
params: {
query: [
'select null::geometry the_geom_webmercator, date AS d',
'from generate_series(',
'\'1970-01-01 00:00:00\'::timestamptz,',
'\'1970-01-01 01:59:00\'::timestamptz,',
' \'1 minute\'::interval',
') date'
].join(' ')
}
}
]
);
var dateHistogramsUseCases = [{
desc: 'supporting timestamp with offset',
dataviewId: 'timezone_epoch_histogram_tz'
}, {
desc: 'supporting timestamp without offset',
dataviewId: 'timezone_epoch_histogram'
}];
dateHistogramsUseCases.forEach(function (test) {
it('should return histogram with two buckets ' + test.desc , function(done) {
this.testClient = new TestClient(mapConfig, 1234);
const override = {
aggregation: 'day',
offset: '-3600'
};
this.testClient.getDataview(test.dataviewId, override, function(err, dataview) {
assert.ifError(err);
var OFFSET_IN_MINUTES = -1 * 60; // GMT-01
var initialTimestamp = '1969-12-31T00:00:00-01:00';
var binsStartInMilliseconds = dataview.bins_start * 1000;
var binsStartFormatted = moment.utc(binsStartInMilliseconds)
.utcOffset(OFFSET_IN_MINUTES)
.format();
assert.equal(binsStartFormatted, initialTimestamp);
dataview.bins.forEach(function (bin, index) {
var binTimestampExpected = moment.utc(initialTimestamp)
.utcOffset(OFFSET_IN_MINUTES)
.add(index, override.aggregation)
.format();
var binsTimestampInMilliseconds = bin.timestamp * 1000;
var binTimestampFormatted = moment.utc(binsTimestampInMilliseconds)
.utcOffset(OFFSET_IN_MINUTES)
.format();
assert.equal(binTimestampFormatted, binTimestampExpected);
assert.ok(bin.timestamp <= bin.min, 'bin timestamp < bin min: ' + JSON.stringify(bin));
assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin));
});
done();
});
});
});
});
describe('histogram: be able to override with aggregation for histograms instantiated w/o aggregation', function() {
afterEach(function(done) {
if (this.testClient) {
this.testClient.drain(done);
} else {
done();
}
});
var mapConfig = createMapConfig(
[
{
type: "cartodb",
options: {
source: {
id: "a0"
},
cartocss: "#points { marker-width: 10; marker-fill: red; }",
cartocss_version: "2.3.0"
}
}
],
{
timezone_epoch_histogram: {
source: {
id: 'a0'
},
type: 'histogram',
options: {
column: 'd',
}
}
},
[
{
id: 'a0',
type: 'source',
params: {
query: [
'select null::geometry the_geom_webmercator, date AS d',
'from generate_series(',
'\'1970-01-01 00:00:00\'::timestamp,',
'\'1970-01-01 01:59:00\'::timestamp,',
' \'1 minute\'::interval',
') date'
].join(' ')
}
}
]
);
it('should apply aggregation to the histogram', function(done) {
this.testClient = new TestClient(mapConfig, 1234);
const override = {
aggregation: 'day',
offset: '-3600'
};
this.testClient.getDataview('timezone_epoch_histogram', override, function(err, dataview) {
assert.ifError(err);
var OFFSET_IN_MINUTES = -1 * 60; // GMT-01
var initialTimestamp = '1969-12-31T00:00:00-01:00';
var binsStartInMilliseconds = dataview.bins_start * 1000;
var binsStartFormatted = moment.utc(binsStartInMilliseconds)
.utcOffset(OFFSET_IN_MINUTES)
.format();
assert.equal(binsStartFormatted, initialTimestamp);
dataview.bins.forEach(function (bin, index) {
var binTimestampExpected = moment.utc(initialTimestamp)
.utcOffset(OFFSET_IN_MINUTES)
.add(index, override.aggregation)
.format();
var binsTimestampInMilliseconds = bin.timestamp * 1000;
var binTimestampFormatted = moment.utc(binsTimestampInMilliseconds)
.utcOffset(OFFSET_IN_MINUTES)
.format();
assert.equal(binTimestampFormatted, binTimestampExpected);
assert.ok(bin.timestamp <= bin.min, 'bin timestamp < bin min: ' + JSON.stringify(bin));
assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin));
});
done();
});
});
});