diff --git a/NEWS.md b/NEWS.md index 9ca6d4a4..bd89d748 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,14 @@ ## 3.10.2 Released 2017-mm-dd +Announcements: + - Allow to override with any aggregation for histograms instantiated w/o aggregation. + +Bug fixes: + - Apply timezone after truncating the minimun date for each bin to calculate timestamps in time-series. + - Support timestamp with timezones to calculate the number of bins in time-series. + - Fixed issue related to name collision while building time-series query. + ## 3.10.1 Released 2017-08-04 diff --git a/lib/cartodb/models/dataview/histogram.js b/lib/cartodb/models/dataview/histogram.js index 76b42c03..c27c696f 100644 --- a/lib/cartodb/models/dataview/histogram.js +++ b/lib/cartodb/models/dataview/histogram.js @@ -9,42 +9,42 @@ var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})"); var dateIntervalQueryTpl = dot.template([ 'WITH', - 'dates AS (', + '__cdb_dates AS (', ' SELECT', - ' MAX({{=it.column}}::timestamp) AS _end,', - ' MIN({{=it.column}}::timestamp) AS _start', - ' FROM ({{=it.query}}) _cdb_source', + ' MAX({{=it.column}}::timestamp) AS __cdb_end,', + ' MIN({{=it.column}}::timestamp) AS __cdb_start', + ' FROM ({{=it.query}}) __cdb_source', '),', - 'interval_in_days AS (', + '__cdb_interval_in_days AS (', ' SELECT' , - ' DATE_PART(\'day\', _end - _start) AS days', - ' FROM dates', + ' DATE_PART(\'day\', __cdb_end - __cdb_start) AS __cdb_days', + ' FROM __cdb_dates', '),', - 'interval_in_hours AS (', + '__cdb_interval_in_hours AS (', ' SELECT', - ' days * 24 + DATE_PART(\'hour\', _end - _start) AS hours', - ' FROM interval_in_days, dates', + ' __cdb_days * 24 + DATE_PART(\'hour\', __cdb_end - __cdb_start) AS __cdb_hours', + ' FROM __cdb_interval_in_days, __cdb_dates', '),', - 'interval_in_minutes AS (', + '__cdb_interval_in_minutes AS (', ' SELECT', - ' hours * 60 + DATE_PART(\'minute\', _end - _start) AS minutes', - ' FROM interval_in_hours, dates', + ' __cdb_hours * 60 + DATE_PART(\'minute\', __cdb_end - __cdb_start) AS __cdb_minutes', + ' FROM __cdb_interval_in_hours, __cdb_dates', '),', - 'interval_in_seconds AS (', + '__cdb_interval_in_seconds AS (', ' SELECT', - ' minutes * 60 + DATE_PART(\'second\', _end - _start) AS seconds', - ' FROM interval_in_minutes, dates', + ' __cdb_minutes * 60 + DATE_PART(\'second\', __cdb_end - __cdb_start) AS __cdb_seconds', + ' FROM __cdb_interval_in_minutes, __cdb_dates', ')', 'SELECT', - ' ROUND(days / 365) AS year,', - ' ROUND(days / 90) AS quarter,', - ' ROUND(days / 30) AS month,', - ' ROUND(days / 7) AS week,', - ' days AS day,', - ' hours AS hour,', - ' minutes AS minute,', - ' seconds AS second', - 'FROM interval_in_days, interval_in_hours, interval_in_minutes, interval_in_seconds' + ' ROUND(__cdb_days / 365) AS year,', + ' ROUND(__cdb_days / 90) AS quarter,', + ' ROUND(__cdb_days / 30) AS month,', + ' ROUND(__cdb_days / 7) AS week,', + ' __cdb_days AS day,', + ' __cdb_hours AS hour,', + ' __cdb_minutes AS minute,', + ' __cdb_seconds AS second', + 'FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds' ].join('\n')); var MAX_INTERVAL_VALUE = 366; @@ -52,9 +52,9 @@ var BIN_MIN_NUMBER = 6; var BIN_MAX_NUMBER = 48; var filteredQueryTpl = dot.template([ - 'filtered_source AS (', + '__cdb_filtered_source AS (', ' SELECT *', - ' FROM ({{=it._query}}) _cdb_filtered_source', + ' FROM ({{=it._query}}) __cdb_filtered_source_query', ' WHERE', ' {{=it._column}} IS NOT NULL', ' {{?it._isFloatColumn}}AND', @@ -67,74 +67,74 @@ var filteredQueryTpl = dot.template([ ].join(' \n')); var basicsQueryTpl = dot.template([ - 'basics AS (', + '__cdb_basics AS (', ' SELECT', - ' max({{=it._column}}) AS max_val, min({{=it._column}}) AS min_val,', - ' avg({{=it._column}}) AS avg_val, count(1) AS total_rows', - ' FROM filtered_source', + ' max({{=it._column}}) AS __cdb_max_val, min({{=it._column}}) AS __cdb_min_val,', + ' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows', + ' FROM __cdb_filtered_source', ')' ].join(' \n')); var overrideBasicsQueryTpl = dot.template([ - 'basics AS (', + '__cdb_basics AS (', ' SELECT', - ' max({{=it._end}}) AS max_val, min({{=it._start}}) AS min_val,', - ' avg({{=it._column}}) AS avg_val, count(1) AS total_rows', - ' FROM filtered_source', + ' max({{=it._end}}) AS __cdb_max_val, min({{=it._start}}) AS __cdb_min_val,', + ' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows', + ' FROM __cdb_filtered_source', ')' ].join('\n')); var iqrQueryTpl = dot.template([ - 'iqrange AS (', - ' SELECT max(quartile_max) - min(quartile_max) AS iqr', + '__cdb_iqrange AS (', + ' SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr', ' FROM (', ' SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (', ' SELECT {{=it._column}} AS _cdb_iqr_column, ntile(4) over (order by {{=it._column}}', ' ) AS quartile', - ' FROM filtered_source) _cdb_quartiles', + ' FROM __cdb_filtered_source) _cdb_quartiles', ' WHERE quartile = 1 or quartile = 3', ' GROUP BY quartile', - ' ) _cdb_iqr', + ' ) __cdb_iqr', ')' ].join('\n')); var binsQueryTpl = dot.template([ - 'bins AS (', - ' SELECT CASE WHEN total_rows = 0 OR iqr = 0', + '__cdb_bins AS (', + ' SELECT CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0', ' THEN 1', ' ELSE GREATEST(', - ' LEAST({{=it._minBins}}, CAST(total_rows AS INT)),', + ' LEAST({{=it._minBins}}, CAST(__cdb_total_rows AS INT)),', ' LEAST(', - ' CAST(((max_val - min_val) / (2 * iqr * power(total_rows, 1/3))) AS INT),', + ' CAST(((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3))) AS INT),', ' {{=it._maxBins}}', ' )', ' )', - ' END AS bins_number', - ' FROM basics, iqrange, filtered_source', + ' END AS __cdb_bins_number', + ' FROM __cdb_basics, __cdb_iqrange, __cdb_filtered_source', ' LIMIT 1', ')' ].join('\n')); var overrideBinsQueryTpl = dot.template([ - 'bins AS (', - ' SELECT {{=it._bins}} AS bins_number', + '__cdb_bins AS (', + ' SELECT {{=it._bins}} AS __cdb_bins_number', ')' ].join('\n')); var nullsQueryTpl = dot.template([ - 'nulls AS (', + '__cdb_nulls AS (', ' SELECT', - ' count(*) AS nulls_count', - ' FROM ({{=it._query}}) _cdb_histogram_nulls', + ' count(*) AS __cdb_nulls_count', + ' FROM ({{=it._query}}) __cdb_histogram_nulls', ' WHERE {{=it._column}} IS NULL', ')' ].join('\n')); var infinitiesQueryTpl = dot.template([ - 'infinities AS (', + '__cdb_infinities AS (', ' SELECT', - ' count(*) AS infinities_count', - ' FROM ({{=it._query}}) _cdb_histogram_infinities', + ' count(*) AS __cdb_infinities_count', + ' FROM ({{=it._query}}) __cdb_infinities_query', ' WHERE', ' {{=it._column}} = \'infinity\'::float', ' OR', @@ -143,121 +143,128 @@ var infinitiesQueryTpl = dot.template([ ].join('\n')); var nansQueryTpl = dot.template([ - 'nans AS (', + '__cdb_nans AS (', ' SELECT', - ' count(*) AS nans_count', - ' FROM ({{=it._query}}) _cdb_histogram_infinities', + ' count(*) AS __cdb_nans_count', + ' FROM ({{=it._query}}) __cdb_nans_query', ' WHERE {{=it._column}} = \'NaN\'::float', ')' ].join('\n')); var histogramQueryTpl = dot.template([ 'SELECT', - ' (max_val - min_val) / cast(bins_number as float) AS bin_width,', - ' bins_number,', - ' nulls_count,', - ' {{?it._isFloatColumn}}infinities_count,', - ' nans_count,{{?}}', - ' avg_val,', - ' CASE WHEN min_val = max_val', + ' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,', + ' __cdb_bins_number AS bins_number,', + ' __cdb_nulls_count AS nulls_count,', + ' {{?it._isFloatColumn}}__cdb_infinities_count AS infinities_count,', + ' __cdb_nans_count AS nans_count,{{?}}', + ' __cdb_avg_val AS avg_val,', + ' CASE WHEN __cdb_min_val = __cdb_max_val', ' THEN 0', - ' ELSE GREATEST(1, LEAST(WIDTH_BUCKET({{=it._column}}, min_val, max_val, bins_number), bins_number)) - 1', + ' ELSE GREATEST(', + ' 1,', + ' LEAST(', + ' WIDTH_BUCKET({{=it._column}}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),', + ' __cdb_bins_number', + ' )', + ' ) - 1', ' END AS bin,', ' min({{=it._column}})::numeric AS min,', ' max({{=it._column}})::numeric AS max,', ' avg({{=it._column}})::numeric AS avg,', ' count(*) AS freq', - 'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}}, infinities, nans{{?}}', + 'FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls,', + ' __cdb_bins{{?it._isFloatColumn}}, __cdb_infinities, __cdb_nans{{?}}', 'GROUP BY bin, bins_number, bin_width, nulls_count,', ' avg_val{{?it._isFloatColumn}}, infinities_count, nans_count{{?}}', 'ORDER BY bin' ].join('\n')); var dateBasicsQueryTpl = dot.template([ - 'basics AS (', + '__cdb_basics AS (', ' SELECT', - ' max(date_part(\'epoch\', {{=it._column}})) AS max_val,', - ' min(date_part(\'epoch\', {{=it._column}})) AS min_val,', - ' avg(date_part(\'epoch\', {{=it._column}})) AS avg_val,', + ' max(date_part(\'epoch\', {{=it._column}})) AS __cdb_max_val,', + ' min(date_part(\'epoch\', {{=it._column}})) AS __cdb_min_val,', + ' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,', ' min(date_trunc(', - ' \'{{=it._aggregation}}\', {{=it._column}} AT TIME ZONE \'{{=it._offset}}\'', - ' )) AS start_date,', - ' max({{=it._column}} AT TIME ZONE \'{{=it._offset}}\') AS end_date,', - ' count(1) AS total_rows', - ' FROM ({{=it._query}}) _cdb_basics', + ' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'', + ' )) AS __cdb_start_date,', + ' max({{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\') AS __cdb_end_date,', + ' count(1) AS __cdb_total_rows', + ' FROM ({{=it._query}}) __cdb_basics_query', ')' ].join(' \n')); var dateOverrideBasicsQueryTpl = dot.template([ - 'basics AS (', + '__cdb_basics AS (', ' SELECT', - ' max({{=it._end}}) AS max_val,', - ' min({{=it._start}}) AS min_val,', - ' avg(date_part(\'epoch\', {{=it._column}})) AS avg_val,', + ' max({{=it._end}}) AS __cdb_max_val,', + ' min({{=it._start}}) AS __cdb_min_val,', + ' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,', ' min(', ' date_trunc(', ' \'{{=it._aggregation}}\',', ' TO_TIMESTAMP({{=it._start}})::timestamp AT TIME ZONE \'{{=it._offset}}\'', ' )', - ' ) AS start_date,', + ' ) AS __cdb_start_date,', ' max(', ' TO_TIMESTAMP({{=it._end}})::timestamp AT TIME ZONE \'{{=it._offset}}\'', - ' ) AS end_date,', - ' count(1) AS total_rows', - ' FROM ({{=it._query}}) _cdb_basics', + ' ) AS __cdb_end_date,', + ' count(1) AS __cdb_total_rows', + ' FROM ({{=it._query}}) __cdb_basics_query', ')' ].join(' \n')); var dateBinsQueryTpl = dot.template([ - 'bins AS (', + '__cdb_bins AS (', ' SELECT', - ' bins_array,', - ' ARRAY_LENGTH(bins_array, 1) AS bins_number', + ' __cdb_bins_array,', + ' ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number', ' FROM (', ' SELECT', ' ARRAY(', ' SELECT GENERATE_SERIES(', - ' start_date::timestamptz,', - ' end_date::timestamptz,', + ' __cdb_start_date::timestamptz,', + ' __cdb_end_date::timestamptz,', ' {{?it._aggregation==="quarter"}}\'3 month\'{{??}}\'1 {{=it._aggregation}}\'{{?}}::interval', ' )', - ' ) AS bins_array', - ' FROM basics', - ' ) _cdb_bins_array', + ' ) AS __cdb_bins_array', + ' FROM __cdb_basics', + ' ) __cdb_bins_array_query', ')' ].join('\n')); var dateHistogramQueryTpl = dot.template([ 'SELECT', - ' (max_val - min_val) / cast(bins_number as float) AS bin_width,', - ' bins_number,', - ' nulls_count,', - ' CASE WHEN min_val = max_val', + ' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,', + ' __cdb_bins_number AS bins_number,', + ' __cdb_nulls_count AS nulls_count,', + ' CASE WHEN __cdb_min_val = __cdb_max_val', ' THEN 0', ' ELSE GREATEST(1, LEAST(', ' WIDTH_BUCKET(', ' {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\',', - ' bins_array', + ' __cdb_bins_array', ' ),', - ' bins_number', + ' __cdb_bins_number', ' )) - 1', ' END AS bin,', ' min(', ' date_part(', ' \'epoch\', ', ' date_trunc(', - ' \'{{=it._aggregation}}\', {{=it._column}}::timestamptz', + ' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'', ' ) AT TIME ZONE \'{{=it._offset}}\'', ' )', ' )::numeric AS timestamp,', - ' date_part(\'epoch\', start_date)::numeric AS timestamp_start,', + ' date_part(\'epoch\', __cdb_start_date)::numeric AS timestamp_start,', ' min(date_part(\'epoch\', {{=it._column}}))::numeric AS min,', ' max(date_part(\'epoch\', {{=it._column}}))::numeric AS max,', ' avg(date_part(\'epoch\', {{=it._column}}))::numeric AS avg,', ' count(*) AS freq', - 'FROM ({{=it._query}}) _cdb_histogram, basics, bins, nulls', + 'FROM ({{=it._query}}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls', 'WHERE date_part(\'epoch\', {{=it._column}}) IS NOT NULL', - 'GROUP BY bin, bins_number, bin_width, nulls_count, avg_val, start_date', + 'GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start', 'ORDER BY bin' ].join('\n')); @@ -328,12 +335,16 @@ Histogram.prototype.sql = function(psql, override, callback) { this._buildQuery(psql, override, callback); }; +Histogram.prototype.isDateHistogram = function (override) { + return this._columnType === 'date' && (this.aggregation !== undefined || override.aggregation !== undefined); +}; + Histogram.prototype._buildQuery = function (psql, override, callback) { var filteredQuery, basicsQuery, binsQuery; var _column = this.column; var _query = this.query; - if (this._columnType === 'date' && this.aggregation !== undefined) { + if (this.isDateHistogram(override)) { return this._buildDateHistogramQuery(psql, override, callback); } diff --git a/test/acceptance/dataviews/histogram.js b/test/acceptance/dataviews/histogram.js index a75cf2b1..3baf002b 100644 --- a/test/acceptance/dataviews/histogram.js +++ b/test/acceptance/dataviews/histogram.js @@ -888,7 +888,6 @@ describe('histogram-dates: aggregation input value', function() { }); }); - describe('histogram-dates: timestamp starts at epoch', function() { afterEach(function(done) { @@ -958,3 +957,221 @@ describe('histogram-dates: timestamp starts at epoch', function() { }); }); }); + +describe('histogram-dates: trunc timestamp for each bin respecting user\'s timezone', function() { + + afterEach(function(done) { + if (this.testClient) { + this.testClient.drain(done); + } else { + done(); + } + }); + + var mapConfig = createMapConfig( + [ + { + type: "cartodb", + options: { + source: { + id: "a0" + }, + cartocss: "#points { marker-width: 10; marker-fill: red; }", + cartocss_version: "2.3.0" + } + } + ], + { + timezone_epoch_histogram: { + source: { + id: 'a0' + }, + type: 'histogram', + options: { + column: 'd', + aggregation: 'auto' + } + }, + timezone_epoch_histogram_tz: { + source: { + id: 'a1' + }, + type: 'histogram', + options: { + column: 'd', + aggregation: 'auto' + } + } + }, + [ + { + id: 'a0', + type: 'source', + params: { + query: [ + 'select null::geometry the_geom_webmercator, date AS d', + 'from generate_series(', + '\'1970-01-01 00:00:00\'::timestamp,', + '\'1970-01-01 01:59:00\'::timestamp,', + ' \'1 minute\'::interval', + ') date' + ].join(' ') + } + }, + { + id: 'a1', + type: 'source', + params: { + query: [ + 'select null::geometry the_geom_webmercator, date AS d', + 'from generate_series(', + '\'1970-01-01 00:00:00\'::timestamptz,', + '\'1970-01-01 01:59:00\'::timestamptz,', + ' \'1 minute\'::interval', + ') date' + ].join(' ') + } + } + ] + ); + + var dateHistogramsUseCases = [{ + desc: 'supporting timestamp with offset', + dataviewId: 'timezone_epoch_histogram_tz' + }, { + desc: 'supporting timestamp without offset', + dataviewId: 'timezone_epoch_histogram' + }]; + + dateHistogramsUseCases.forEach(function (test) { + it('should return histogram with two buckets ' + test.desc , function(done) { + this.testClient = new TestClient(mapConfig, 1234); + + const override = { + aggregation: 'day', + offset: '-3600' + }; + + this.testClient.getDataview(test.dataviewId, override, function(err, dataview) { + assert.ifError(err); + + var OFFSET_IN_MINUTES = -1 * 60; // GMT-01 + var initialTimestamp = '1969-12-31T00:00:00-01:00'; + var binsStartInMilliseconds = dataview.bins_start * 1000; + var binsStartFormatted = moment.utc(binsStartInMilliseconds) + .utcOffset(OFFSET_IN_MINUTES) + .format(); + assert.equal(binsStartFormatted, initialTimestamp); + + dataview.bins.forEach(function (bin, index) { + var binTimestampExpected = moment.utc(initialTimestamp) + .utcOffset(OFFSET_IN_MINUTES) + .add(index, override.aggregation) + .format(); + var binsTimestampInMilliseconds = bin.timestamp * 1000; + var binTimestampFormatted = moment.utc(binsTimestampInMilliseconds) + .utcOffset(OFFSET_IN_MINUTES) + .format(); + + assert.equal(binTimestampFormatted, binTimestampExpected); + assert.ok(bin.timestamp <= bin.min, 'bin timestamp < bin min: ' + JSON.stringify(bin)); + assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin)); + }); + + done(); + }); + }); + }); +}); + + +describe('histogram: be able to override with aggregation for histograms instantiated w/o aggregation', function() { + + afterEach(function(done) { + if (this.testClient) { + this.testClient.drain(done); + } else { + done(); + } + }); + + var mapConfig = createMapConfig( + [ + { + type: "cartodb", + options: { + source: { + id: "a0" + }, + cartocss: "#points { marker-width: 10; marker-fill: red; }", + cartocss_version: "2.3.0" + } + } + ], + { + timezone_epoch_histogram: { + source: { + id: 'a0' + }, + type: 'histogram', + options: { + column: 'd', + } + } + }, + [ + { + id: 'a0', + type: 'source', + params: { + query: [ + 'select null::geometry the_geom_webmercator, date AS d', + 'from generate_series(', + '\'1970-01-01 00:00:00\'::timestamp,', + '\'1970-01-01 01:59:00\'::timestamp,', + ' \'1 minute\'::interval', + ') date' + ].join(' ') + } + } + ] + ); + + it('should apply aggregation to the histogram', function(done) { + this.testClient = new TestClient(mapConfig, 1234); + + const override = { + aggregation: 'day', + offset: '-3600' + }; + + this.testClient.getDataview('timezone_epoch_histogram', override, function(err, dataview) { + assert.ifError(err); + + var OFFSET_IN_MINUTES = -1 * 60; // GMT-01 + var initialTimestamp = '1969-12-31T00:00:00-01:00'; + var binsStartInMilliseconds = dataview.bins_start * 1000; + var binsStartFormatted = moment.utc(binsStartInMilliseconds) + .utcOffset(OFFSET_IN_MINUTES) + .format(); + assert.equal(binsStartFormatted, initialTimestamp); + + dataview.bins.forEach(function (bin, index) { + var binTimestampExpected = moment.utc(initialTimestamp) + .utcOffset(OFFSET_IN_MINUTES) + .add(index, override.aggregation) + .format(); + var binsTimestampInMilliseconds = bin.timestamp * 1000; + var binTimestampFormatted = moment.utc(binsTimestampInMilliseconds) + .utcOffset(OFFSET_IN_MINUTES) + .format(); + + assert.equal(binTimestampFormatted, binTimestampExpected); + assert.ok(bin.timestamp <= bin.min, 'bin timestamp < bin min: ' + JSON.stringify(bin)); + assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin)); + }); + + done(); + }); + }); +});