diff --git a/NEWS.md b/NEWS.md index 84cfb4bf..e70b8df2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,20 @@ # Changelog -## 4.2.1 +## 4.3.1 Released 2017-mm-dd +Announcements: + +## 4.3.0 +Released 2017-12-11 + +Announcements: +- Optimize Formula queries. +- Optimize Formula queries in overviews. +- Optimize Numeric Histogram queries. +- Optimize Date Histogram queries. +- Date Histograms: Now returns the same value for max/min/avg/timestamp per bin. +- Date Histograms: Now it should return the same no matter the DB/Client time zone. ## 4.2.0 Released 2017-12-04 diff --git a/lib/cartodb/models/dataview/formula.js b/lib/cartodb/models/dataview/formula.js index daaeaf0c..df009e4b 100644 --- a/lib/cartodb/models/dataview/formula.js +++ b/lib/cartodb/models/dataview/formula.js @@ -1,34 +1,14 @@ const BaseDataview = require('./base'); const debug = require('debug')('windshaft:dataview:formula'); +const utils = require('../../utils/query-utils'); -const countInfinitiesQueryTpl = ctx => ` - SELECT count(1) FROM (${ctx.query}) __cdb_formula_infinities - WHERE ${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float -`; - -const countNansQueryTpl = ctx => ` - SELECT count(1) FROM (${ctx.query}) __cdb_formula_nans - WHERE ${ctx.column} = 'NaN'::float -`; - -const filterOutSpecialNumericValuesTpl = ctx => ` - WHERE - ${ctx.column} != 'infinity'::float - AND - ${ctx.column} != '-infinity'::float - AND - ${ctx.column} != 'NaN'::float -`; - -const formulaQueryTpl = ctx => ` - SELECT - ${ctx.operation}(${ctx.column}) AS result, - (SELECT count(1) FROM (${ctx.query}) _cdb_formula_nulls WHERE ${ctx.column} IS NULL) AS nulls_count - ${ctx.isFloatColumn ? `,(${countInfinitiesQueryTpl(ctx)}) AS infinities_count` : ''} - ${ctx.isFloatColumn ? `,(${countNansQueryTpl(ctx)}) AS nans_count` : ''} - FROM (${ctx.query}) __cdb_formula - ${ctx.isFloatColumn && ctx.operation !== 'count' ? `${filterOutSpecialNumericValuesTpl(ctx)}` : ''} -`; +const formulaQueryTpl = ctx => +`SELECT + ${ctx.operation}(${utils.handleFloatColumn(ctx)}) AS result, + ${utils.countNULLs(ctx)} AS nulls_count + ${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``} + ${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``} +FROM (${ctx.query}) __cdb_formula`; const VALID_OPERATIONS = { count: true, diff --git a/lib/cartodb/models/dataview/histograms/date-histogram.js b/lib/cartodb/models/dataview/histograms/date-histogram.js index 34a42f9a..af151a72 100644 --- a/lib/cartodb/models/dataview/histograms/date-histogram.js +++ b/lib/cartodb/models/dataview/histograms/date-histogram.js @@ -1,5 +1,102 @@ const BaseHistogram = require('./base-histogram'); const debug = require('debug')('windshaft:dataview:date-histogram'); +const utils = require('../../../utils/query-utils'); + +/** + * Gets the name of a timezone with the same offset as the required + * using the pg_timezone_names table. We do this because it's simpler to pass + * the name than to pass the offset itself as PostgreSQL uses different + * sign convention. For example: TIME ZONE 'CET' is equal to TIME ZONE 'UTC-1', + * not 'UTC+1' which would be expected. + * Gives priority to Etc/GMT±N timezones but still support odd offsets like 8.5 + * hours for Asia/Pyongyang. + * It also makes it easier to, in the future, support the input of expected timezone + * instead of the offset; that is using 'Europe/Madrid' instead of + * '+3600' or '+7200'. The daylight saving status can be handled by postgres. + */ +const offsetNameQueryTpl = ctx => ` +WITH __wd_tz AS +( + SELECT name + FROM pg_timezone_names + WHERE utc_offset = interval '${ctx.offset} hours' + ORDER BY CASE WHEN name LIKE 'Etc/GMT%' THEN 0 ELSE 1 END + LIMIT 1 +),`; + +/** + * Function to get the subquery that places each row in its bin depending on + * the aggregation. Since the data stored is in epoch we need to adapt it to + * our timezone so when calling date_trunc it falls into the correct bin + */ +function dataBucketsQuery(ctx) { + var condition_str = ''; + + if (ctx.start !== 0) { + condition_str = `WHERE ${ctx.column} >= to_timestamp(${ctx.start})`; + } + if (ctx.end !== 0) { + if (condition_str === '') { + condition_str = `WHERE ${ctx.column} <= to_timestamp(${ctx.end})`; + } + else { + condition_str += ` and ${ctx.column} <= to_timestamp(${ctx.end})`; + } + } + + return ` +__wd_buckets AS +( + SELECT + date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, ${ctx.column}::timestamptz)) as timestamp, + count(*) as freq, + ${utils.countNULLs(ctx)} as nulls_count + FROM + ( + ${ctx.query} + ) __source, __wd_tz + ${condition_str} + GROUP BY timestamp, __wd_tz.name +),`; +} + +/** + * Function that generates an array with all the possible bins between the + * start and end date. If not provided we use the min and max generated from + * the dataBucketsQuery + */ +function allBucketsArrayQuery(ctx) { + var extra_from = ``; + var series_start = ``; + var series_end = ``; + + if (ctx.start === 0) { + extra_from = `, __wd_buckets GROUP BY __wd_tz.name`; + series_start = `min(__wd_buckets.timestamp)`; + } else { + series_start = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.start})))`; + } + + if (ctx.end === 0) { + extra_from = `, __wd_buckets GROUP BY __wd_tz.name`; + series_end = `max(__wd_buckets.timestamp)`; + } else { + series_end = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.end})))`; + } + + return ` +__wd_all_buckets AS +( + SELECT ARRAY( + SELECT + generate_series( + ${series_start}, + ${series_end}, + interval '${ctx.interval}') as bin_start + FROM __wd_tz${extra_from} + ) as bins +)`; +} const dateIntervalQueryTpl = ctx => ` WITH @@ -41,107 +138,6 @@ const dateIntervalQueryTpl = ctx => ` FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds `; -const nullsQueryTpl = ctx => ` - __cdb_nulls AS ( - SELECT - count(*) AS __cdb_nulls_count - FROM (${ctx.query}) __cdb_histogram_nulls - WHERE ${ctx.column} IS NULL - ) -`; - -const dateBasicsQueryTpl = ctx => ` - __cdb_basics AS ( - SELECT - max(date_part('epoch', ${ctx.column})) AS __cdb_max_val, - min(date_part('epoch', ${ctx.column})) AS __cdb_min_val, - avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val, - min( - date_trunc( - '${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}' - ) - ) AS __cdb_start_date, - max(${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}') AS __cdb_end_date, - count(1) AS __cdb_total_rows - FROM (${ctx.query}) __cdb_basics_query - ) -`; - -const dateOverrideBasicsQueryTpl = ctx => ` - __cdb_basics AS ( - SELECT - max(${ctx.end})::float AS __cdb_max_val, - min(${ctx.start})::float AS __cdb_min_val, - avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val, - min( - date_trunc( - '${ctx.aggregation}', - TO_TIMESTAMP(${ctx.start})::timestamp AT TIME ZONE '${ctx.offset}' - ) - ) AS __cdb_start_date, - max( - TO_TIMESTAMP(${ctx.end})::timestamp AT TIME ZONE '${ctx.offset}' - ) AS __cdb_end_date, - count(1) AS __cdb_total_rows - FROM (${ctx.query}) __cdb_basics_query - ) -`; - -const dateBinsQueryTpl = ctx => ` - __cdb_bins AS ( - SELECT - __cdb_bins_array, - ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number - FROM ( - SELECT - ARRAY( - SELECT GENERATE_SERIES( - __cdb_start_date::timestamptz, - __cdb_end_date::timestamptz, - ${ctx.aggregation === 'quarter' ? `'3 month'::interval` : `'1 ${ctx.aggregation}'::interval`} - ) - ) AS __cdb_bins_array - FROM __cdb_basics - ) __cdb_bins_array_query - ) -`; - -const dateHistogramQueryTpl = ctx => ` - SELECT - (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width, - __cdb_bins_number AS bins_number, - __cdb_nulls_count AS nulls_count, - CASE WHEN __cdb_min_val = __cdb_max_val - THEN 0 - ELSE GREATEST( - 1, - LEAST( - WIDTH_BUCKET( - ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}', - __cdb_bins_array - ), - __cdb_bins_number - ) - ) - 1 - END AS bin, - min( - date_part( - 'epoch', - date_trunc( - '${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}' - ) AT TIME ZONE '${ctx.offset}' - ) - )::numeric AS timestamp, - date_part('epoch', __cdb_start_date)::numeric AS timestamp_start, - min(date_part('epoch', ${ctx.column}))::numeric AS min, - max(date_part('epoch', ${ctx.column}))::numeric AS max, - avg(date_part('epoch', ${ctx.column}))::numeric AS avg, - count(*) AS freq - FROM (${ctx.query}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls - WHERE date_part('epoch', ${ctx.column}) IS NOT NULL - GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start - ORDER BY bin -`; const MAX_INTERVAL_VALUE = 366; @@ -176,12 +172,21 @@ module.exports = class DateHistogram extends BaseHistogram { _buildQueryTpl (ctx) { return ` - WITH - ${this._hasOverridenRange(ctx.override) ? dateOverrideBasicsQueryTpl(ctx) : dateBasicsQueryTpl(ctx)}, - ${dateBinsQueryTpl(ctx)}, - ${nullsQueryTpl(ctx)} - ${dateHistogramQueryTpl(ctx)} - `; +${offsetNameQueryTpl(ctx)} +${dataBucketsQuery(ctx)} +${allBucketsArrayQuery(ctx)} +SELECT + array_position(__wd_all_buckets.bins, __wd_buckets.timestamp) - 1 as bin, + date_part('epoch', timezone(__wd_tz.name, __wd_buckets.timestamp)) AS timestamp, + __wd_buckets.freq as freq, + date_part('epoch', timezone(__wd_tz.name, (__wd_all_buckets.bins)[1])) as timestamp_start, + array_length(__wd_all_buckets.bins, 1) as bins_number, + date_part('epoch', interval '${ctx.interval}') as bin_width, + __wd_buckets.nulls_count as nulls_count +FROM __wd_buckets, __wd_all_buckets, __wd_tz +GROUP BY __wd_tz.name, __wd_all_buckets.bins, __wd_buckets.timestamp, __wd_buckets.nulls_count, __wd_buckets.freq +ORDER BY bin ASC; +`; } _buildQuery (psql, override, callback) { @@ -204,6 +209,9 @@ module.exports = class DateHistogram extends BaseHistogram { return null; } + var interval = this._getAggregation(override) === 'quarter' ? + '3 months' : '1 ' + this._getAggregation(override); + const histogramSql = this._buildQueryTpl({ override: override, query: this.query, @@ -211,7 +219,8 @@ module.exports = class DateHistogram extends BaseHistogram { aggregation: this._getAggregation(override), start: this._getBinStart(override), end: this._getBinEnd(override), - offset: this._parseOffset(override) + offset: this._parseOffset(override), + interval: interval }); debug(histogramSql); @@ -264,8 +273,8 @@ module.exports = class DateHistogram extends BaseHistogram { offset: this._getOffset(override), timestamp_start: firstRow.timestamp_start, - bin_width: firstRow.bin_width, - bins_count: firstRow.bins_number, + bin_width: firstRow.bin_width || 0, + bins_count: firstRow.bins_number || 0, bins_start: firstRow.timestamp, nulls: firstRow.nulls_count, infinities: firstRow.infinities_count, @@ -275,6 +284,10 @@ module.exports = class DateHistogram extends BaseHistogram { } _getBuckets (result) { + result.rows.forEach(function(row) { + row.min = row.max = row.avg = row.timestamp; + }); + return result.rows.map(({ bin, min, max, avg, freq, timestamp }) => ({ bin, min, max, avg, freq, timestamp })); } diff --git a/lib/cartodb/models/dataview/histograms/numeric-histogram.js b/lib/cartodb/models/dataview/histograms/numeric-histogram.js index ea191bea..a2d5662d 100644 --- a/lib/cartodb/models/dataview/histograms/numeric-histogram.js +++ b/lib/cartodb/models/dataview/histograms/numeric-histogram.js @@ -1,44 +1,25 @@ const BaseHistogram = require('./base-histogram'); const debug = require('debug')('windshaft:dataview:numeric-histogram'); +const utils = require('../../../utils/query-utils'); -const columnCastTpl = ctx => `date_part('epoch', ${ctx.column})`; - -const filterOutSpecialNumericValues = ctx => ` - ${ctx.column} != 'infinity'::float - AND - ${ctx.column} != '-infinity'::float - AND - ${ctx.column} != 'NaN'::float -`; - -const filteredQueryTpl = ctx => ` +/** Query to get min and max values from the query */ +const irqQueryTpl = ctx => ` __cdb_filtered_source AS ( SELECT * FROM (${ctx.query}) __cdb_filtered_source_query - WHERE ${ctx.column} IS NOT NULL - ${ctx.isFloatColumn ? `AND ${filterOutSpecialNumericValues(ctx)}` : ''} - ) -`; - -const basicsQueryTpl = ctx => ` + WHERE ${utils.handleFloatColumn(ctx)} IS NOT NULL + ), __cdb_basics AS ( SELECT - max(${ctx.column}) AS __cdb_max_val, min(${ctx.column}) AS __cdb_min_val, - avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows + max(${ctx.column}) AS __cdb_max_val, + min(${ctx.column}) AS __cdb_min_val, + count(1) AS __cdb_total_rows FROM __cdb_filtered_source ) `; -const overrideBasicsQueryTpl = ctx => ` - __cdb_basics AS ( - SELECT - max(${ctx.end}) AS __cdb_max_val, min(${ctx.start}) AS __cdb_min_val, - avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows - FROM __cdb_filtered_source - ) -`; - -const iqrQueryTpl = ctx => ` +/* Query to calculate the number of bins (needs irqQueryTpl before it*/ +const binsQueryTpl = ctx => ` __cdb_iqrange AS ( SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr FROM ( @@ -49,10 +30,7 @@ const iqrQueryTpl = ctx => ` WHERE quartile = 1 or quartile = 3 GROUP BY quartile ) __cdb_iqr - ) -`; - -const binsQueryTpl = ctx => ` + ), __cdb_bins AS ( SELECT CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0 @@ -70,83 +48,6 @@ const binsQueryTpl = ctx => ` ) `; -const overrideBinsQueryTpl = ctx => ` - __cdb_bins AS ( - SELECT ${ctx.override.bins} AS __cdb_bins_number - ) -`; - -const nullsQueryTpl = ctx => ` - __cdb_nulls AS ( - SELECT - count(*) AS __cdb_nulls_count - FROM (${ctx.query}) __cdb_histogram_nulls - WHERE ${ctx.column} IS NULL - ) -`; - -const infinitiesQueryTpl = ctx => ` - __cdb_infinities AS ( - SELECT - count(*) AS __cdb_infinities_count - FROM (${ctx.query}) __cdb_infinities_query - WHERE - ${ctx.column} = 'infinity'::float - OR - ${ctx.column} = '-infinity'::float - ) -`; - -const nansQueryTpl = ctx => ` - __cdb_nans AS ( - SELECT - count(*) AS __cdb_nans_count - FROM (${ctx.query}) __cdb_nans_query - WHERE ${ctx.column} = 'NaN'::float - ) -`; - -const specialNumericValuesColumnDefinitionTpl = () => ` - __cdb_infinities_count AS infinities_count, - __cdb_nans_count AS nans_count -`; - -const specialNumericValuesCTETpl = () => ` - __cdb_infinities, __cdb_nans -`; - -const specialNumericValuesColumnTpl = () => ` - infinities_count, nans_count -`; - -const histogramQueryTpl = ctx => ` - SELECT - (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width, - __cdb_bins_number AS bins_number, - __cdb_nulls_count AS nulls_count, - ${ctx.isFloatColumn ? `${specialNumericValuesColumnDefinitionTpl()},` : ''} - __cdb_avg_val AS avg_val, - CASE WHEN __cdb_min_val = __cdb_max_val - THEN 0 - ELSE GREATEST( - 1, - LEAST( - WIDTH_BUCKET(${ctx.column}, __cdb_min_val, __cdb_max_val, __cdb_bins_number), - __cdb_bins_number - ) - ) - 1 - END AS bin, - min(${ctx.column})::numeric AS min, - max(${ctx.column})::numeric AS max, - avg(${ctx.column})::numeric AS avg, - count(*) AS freq - FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls, __cdb_bins - ${ctx.isFloatColumn ? `, ${specialNumericValuesCTETpl()}` : ''} - GROUP BY bin, bins_number, bin_width, nulls_count, avg_val - ${ctx.isFloatColumn ? `, ${specialNumericValuesColumnTpl()}` : ''} - ORDER BY bin -`; - const BIN_MIN_NUMBER = 6; const BIN_MAX_NUMBER = 48; @@ -167,14 +68,14 @@ module.exports = class NumericHistogram extends BaseHistogram { _buildQuery (psql, override, callback) { const histogramSql = this._buildQueryTpl({ - override: override, - column: this._columnType === 'date' ? columnCastTpl({ column: this.column }) : this.column, + column: this._columnType === 'date' ? utils.columnCastTpl({ column: this.column }) : this.column, isFloatColumn: this._columnType === 'float', query: this.query, start: this._getBinStart(override), end: this._getBinEnd(override), + bins: this._getBinsCount(override), minBins: BIN_MIN_NUMBER, - maxBins: BIN_MAX_NUMBER, + maxBins: BIN_MAX_NUMBER }); debug(histogramSql); @@ -182,19 +83,62 @@ module.exports = class NumericHistogram extends BaseHistogram { return callback(null, histogramSql); } + +/** + * ctx: Object with the following values + * ctx.column -- Column for the histogram + * ctx.isFloatColumn - Whether the column is float or not + * ctx.query -- Subquery to extract data + * ctx.start -- Start value for the bins. [>= end to force calculation] + * ctx.end -- End value for the bins. + * ctx.bins -- Numbers of bins to generate [<0 to force calculation] + * ctx.minBins - If !full min bins to calculate [Optional] + * ctx.maxBins - If !full max bins to calculate [Optional] + */ _buildQueryTpl (ctx) { + var extra_tables = ``; + var extra_queries = ``; + var extra_groupby = ``; + + if (ctx.start >= ctx.end) { + ctx.end = `__cdb_basics.__cdb_max_val`; + ctx.start = `__cdb_basics.__cdb_min_val`; + extra_groupby = `, __cdb_basics.__cdb_max_val, __cdb_basics.__cdb_min_val`; + extra_tables = `, __cdb_basics`; + extra_queries = `WITH ${irqQueryTpl(ctx)}`; + } + + if (ctx.bins <= 0) { + ctx.bins = `__cdb_bins.__cdb_bins_number`; + extra_groupby += `, __cdb_bins.__cdb_bins_number`; + extra_tables += `, __cdb_bins`; + extra_queries = `WITH ${irqQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`; + } + return ` - WITH - ${filteredQueryTpl(ctx)}, - ${this._hasOverridenRange(ctx.override) ? overrideBasicsQueryTpl(ctx) : basicsQueryTpl(ctx)}, - ${this._hasOverridenBins(ctx.override) ? - overrideBinsQueryTpl(ctx) : - `${iqrQueryTpl(ctx)}, ${binsQueryTpl(ctx)}` - }, - ${nullsQueryTpl(ctx)} - ${ctx.isFloatColumn ? `,${infinitiesQueryTpl(ctx)}, ${nansQueryTpl(ctx)}` : ''} - ${histogramQueryTpl(ctx)} - `; +${extra_queries} +SELECT + (${ctx.end} - ${ctx.start}) / ${ctx.bins}::float AS bin_width, + ${ctx.bins} as bins_number, + ${utils.countNULLs(ctx)} AS nulls_count, + ${utils.countInfinites(ctx)} AS infinities_count, + ${utils.countNaNs(ctx)} AS nans_count, + min(${utils.handleFloatColumn(ctx)}) AS min, + max(${utils.handleFloatColumn(ctx)}) AS max, + avg(${utils.handleFloatColumn(ctx)}) AS avg, + sum(CASE WHEN (${utils.handleFloatColumn(ctx)} is not NULL) THEN 1 ELSE 0 END) as freq, + CASE WHEN ${ctx.start} = ${ctx.end} + THEN 0 + ELSE GREATEST(1, LEAST( + ${ctx.bins}, + WIDTH_BUCKET(${utils.handleFloatColumn(ctx)}, ${ctx.start}, ${ctx.end}, ${ctx.bins}))) - 1 + END AS bin +FROM +( + ${ctx.query} +) __cdb_filtered_source_query${extra_tables} +GROUP BY bin${extra_groupby} +ORDER BY bin;`; } _hasOverridenBins (override) { @@ -204,14 +148,31 @@ module.exports = class NumericHistogram extends BaseHistogram { _getSummary (result, override) { const firstRow = result.rows[0] || {}; + var total_nulls = 0; + var total_infinities = 0; + var total_nans = 0; + var total_avg = 0; + var total_count = 0; + + result.rows.forEach(function(row) { + total_nulls += row.nulls_count; + total_infinities += row.infinities_count; + total_nans += row.nans_count; + total_avg += row.avg * row.freq; + total_count += row.freq; + }); + if (total_count !== 0) { + total_avg /= total_count; + } + return { bin_width: firstRow.bin_width, bins_count: firstRow.bins_number, bins_start: this._populateBinStart(firstRow, override), - nulls: firstRow.nulls_count, - infinities: firstRow.infinities_count, - nans: firstRow.nans_count, - avg: firstRow.avg_val, + nulls: total_nulls, + infinities: total_infinities, + nans: total_nans, + avg: total_avg }; } diff --git a/lib/cartodb/models/dataview/overviews/formula.js b/lib/cartodb/models/dataview/overviews/formula.js index 64d612c9..2a97061e 100644 --- a/lib/cartodb/models/dataview/overviews/formula.js +++ b/lib/cartodb/models/dataview/overviews/formula.js @@ -1,55 +1,38 @@ var BaseOverviewsDataview = require('./base'); var BaseDataview = require('../formula'); var debug = require('debug')('windshaft:widget:formula:overview'); +const utils = require('../../../utils/query-utils'); var dot = require('dot'); dot.templateSettings.strip = false; -var formulaQueryTpls = { - 'count': dot.template([ - 'SELECT', - 'sum(_feature_count) AS result,', - '(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count', - '{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities', - ' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count,', - '(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans', - ' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}', - 'FROM ({{=it._query}}) _cdb_formula' - ].join('\n')), - 'sum': dot.template([ - 'SELECT', - 'sum({{=it._column}}*_feature_count) AS result,', - '(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count', - '{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities', - ' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count', - ',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans', - ' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}', - 'FROM ({{=it._query}}) _cdb_formula', - '{{?it._isFloatColumn}}WHERE', - ' {{=it._column}} != \'infinity\'::float', - 'AND', - ' {{=it._column}} != \'-infinity\'::float', - 'AND', - ' {{=it._column}} != \'NaN\'::float{{?}}' - ].join('\n')), - 'avg': dot.template([ - 'SELECT', - 'sum({{=it._column}}*_feature_count)/sum(_feature_count) AS result,', - '(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count', - '{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities', - ' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count', - ',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans', - ' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}', - 'FROM ({{=it._query}}) _cdb_formula', - '{{?it._isFloatColumn}}WHERE', - ' {{=it._column}} != \'infinity\'::float', - 'AND', - ' {{=it._column}} != \'-infinity\'::float', - 'AND', - ' {{=it._column}} != \'NaN\'::float{{?}}' - ].join('\n')), +const VALID_OPERATIONS = { + count: true, + sum: true, + avg: true }; +/** Formulae to calculate the end result using _feature_count from the overview table*/ +function dataviewResult(ctx) { + switch (ctx.operation) { + case 'count': + return `sum(_feature_count)`; + case 'sum': + return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)`; + case 'avg': + return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)/sum(_feature_count) `; + } + return `${ctx.operation}(${utils.handleFloatColumn(ctx)})`; +} + +const formulaQueryTpl = ctx => +`SELECT + ${dataviewResult(ctx)} AS result, + ${utils.countNULLs(ctx)} AS nulls_count + ${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``} + ${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``} +FROM (${ctx.query}) __cdb_formula`; + function Formula(query, options, queryRewriter, queryRewriteData, params, queries) { BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries); this.column = options.column || '1'; @@ -65,36 +48,31 @@ module.exports = Formula; Formula.prototype.sql = function (psql, override, callback) { var self = this; - var formulaQueryTpl = formulaQueryTpls[this.operation]; - - if (formulaQueryTpl) { - // supported formula for use with overviews - if (this._isFloatColumn === null) { - this._isFloatColumn = false; - this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) { - if (!err && !!type) { - self._isFloatColumn = type.float; - } - self.sql(psql, override, callback); - }); - return null; - } - - var formulaSql = formulaQueryTpl({ - _isFloatColumn: this._isFloatColumn, - _query: this.rewrittenQuery(this.query), - _operation: this.operation, - _column: this.column - }); - - callback = callback || override; - - debug(formulaSql); - - return callback(null, formulaSql); + if (!VALID_OPERATIONS[this.operation]) { + return this.defaultSql(psql, override, callback); } + if (this._isFloatColumn === null) { + this._isFloatColumn = false; + this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) { + if (!err && !!type) { + self._isFloatColumn = type.float; + } + self.sql(psql, override, callback); + }); + return null; + } - // default behaviour - return this.defaultSql(psql, override, callback); + var formulaSql = formulaQueryTpl({ + isFloatColumn: this._isFloatColumn, + query: this.rewrittenQuery(this.query), + operation: this.operation, + column: this.column + }); + + callback = callback || override; + + debug(formulaSql); + + return callback(null, formulaSql); }; diff --git a/lib/cartodb/utils/query-utils.js b/lib/cartodb/utils/query-utils.js index 873151d8..6c80485a 100644 --- a/lib/cartodb/utils/query-utils.js +++ b/lib/cartodb/utils/query-utils.js @@ -40,3 +40,34 @@ module.exports.getAggregationMetadata = ctx => ` geom_type AS type FROM rowEstimation, geometryType; `; + +/** Cast the column to epoch */ +module.exports.columnCastTpl = function columnCastTpl(ctx) { + return `date_part('epoch', ${ctx.column})`; +}; + +/** If the column type is float, ignore any non numeric result (infinity / NaN) */ +module.exports.handleFloatColumn = function handleFloatColumn(ctx) { + return `${!ctx.isFloatColumn ? `${ctx.column}` : + `nullif(nullif(nullif(${ctx.column}, 'infinity'::float), '-infinity'::float), 'NaN'::float)` + }`; +}; + +/** Count NULL appearances */ +module.exports.countNULLs= function countNULLs(ctx) { + return `sum(CASE WHEN (${ctx.column} IS NULL) THEN 1 ELSE 0 END)`; +}; + +/** Count only infinity (positive and negative) appearances */ +module.exports.countInfinites = function countInfinites(ctx) { + return `${!ctx.isFloatColumn ? `0` : + `sum(CASE WHEN (${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float) THEN 1 ELSE 0 END)` + }`; +}; + +/** Count only NaNs appearances*/ +module.exports.countNaNs = function countNaNs(ctx) { + return `${!ctx.isFloatColumn ? `0` : + `sum(CASE WHEN (${ctx.column} = 'NaN'::float) THEN 1 ELSE 0 END)` + }`; +}; diff --git a/package.json b/package.json index 0fdec8bd..779b72c7 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "private": true, "name": "windshaft-cartodb", - "version": "4.2.1", + "version": "4.3.1", "description": "A map tile server for CartoDB", "keywords": [ "cartodb" diff --git a/test/acceptance/dataviews/histogram.js b/test/acceptance/dataviews/histogram.js index c9e16489..af8ce59d 100644 --- a/test/acceptance/dataviews/histogram.js +++ b/test/acceptance/dataviews/histogram.js @@ -186,7 +186,7 @@ describe('histogram-dataview for date column type', function() { }, minute_histogram: { source: { - id: 'minute-histogram-source' + id: 'minute-histogram-source-tz' }, type: 'histogram', options: { @@ -214,8 +214,8 @@ describe('histogram-dataview for date column type', function() { "params": { "query": [ "select null::geometry the_geom_webmercator, date AS d", - "from generate_series(", - "'2007-02-15 01:00:00'::timestamptz, '2008-04-09 01:00:00'::timestamptz, '1 day'::interval", + "from generate_series('2007-02-15 01:00:00+00'::timestamptz,", + "'2008-04-09 01:00:00+00'::timestamptz, '1 day'::interval", ") date" ].join(' ') } @@ -233,13 +233,13 @@ describe('histogram-dataview for date column type', function() { } }, { - "id": "minute-histogram-source", + "id": "minute-histogram-source-tz", "type": "source", "params": { "query": [ "select null::geometry the_geom_webmercator, date AS d", - "from generate_series(", - "'2007-02-15 23:50:00'::timestamp, '2007-02-16 00:10:00'::timestamp, '1 minute'::interval", + "from generate_series('2007-02-15 23:50:00+00'::timestamptz,", + "'2007-02-16 00:10:00+00'::timestamptz, '1 minute'::interval", ") date" ].join(' ') } @@ -256,6 +256,7 @@ describe('histogram-dataview for date column type', function() { }]; dateHistogramsUseCases.forEach(function (test) { + it('should create a date histogram aggregated in months (EDT) ' + test.desc, function (done) { var OFFSET_EDT_IN_MINUTES = -4 * 60; // EDT Eastern Daylight Time (GMT-4) in minutes @@ -323,7 +324,7 @@ describe('histogram-dataview for date column type', function() { assert.ok(!err, err); assert.equal(dataview.type, 'histogram'); assert.ok(dataview.bin_width > 0, 'Unexpected bin width: ' + dataview.bin_width); - assert.equal(dataview.bins.length, 6); + assert.equal(dataview.bins_count, 6); dataview.bins.forEach(function (bin) { assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin)); }); @@ -335,7 +336,7 @@ describe('histogram-dataview for date column type', function() { it('should cast overridden start and end to float to avoid out of range errors ' + test.desc, function (done) { var params = { start: -2145916800, - end: 1009843199 + end: 1193792400 }; this.testClient = new TestClient(mapConfig, 1234); @@ -348,27 +349,6 @@ describe('histogram-dataview for date column type', function() { }); }); - it('should return same histogram ' + test.desc, function (done) { - var params = { - start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum) - end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum) - }; - - this.testClient = new TestClient(mapConfig, 1234); - this.testClient.getDataview(test.dataviewId, {}, function (err, dataview) { - assert.ok(!err, err); - - this.testClient = new TestClient(mapConfig, 1234); - this.testClient.getDataview(test.dataviewId, params, function (err, filteredDataview) { - assert.ok(!err, err); - - assert.deepEqual(dataview, filteredDataview); - done(); - }); - }); - }); - - it('should aggregate histogram overriding default offset to CEST ' + test.desc, function (done) { var OFFSET_CEST_IN_SECONDS = 2 * 3600; // Central European Summer Time (Daylight Saving Time) var OFFSET_CEST_IN_MINUTES = 2 * 60; // Central European Summer Time (Daylight Saving Time) @@ -533,6 +513,26 @@ describe('histogram-dataview for date column type', function() { }); }); + it('should return same histogram ', function (done) { + var params = { + start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum) + end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum) + }; + + this.testClient = new TestClient(mapConfig, 1234); + this.testClient.getDataview('datetime_histogram_tz', {}, function (err, dataview) { + assert.ok(!err, err); + + this.testClient = new TestClient(mapConfig, 1234); + this.testClient.getDataview('datetime_histogram_tz', params, function (err, filteredDataview) { + assert.ok(!err, err); + + assert.deepEqual(dataview, filteredDataview); + done(); + }); + }); + }); + it('should find the best aggregation (automatic mode) to build the histogram', function (done) { var params = {}; this.testClient = new TestClient(mapConfig, 1234); @@ -640,7 +640,7 @@ describe('histogram-dataview for date column type', function() { var dataviewWithDailyAggFixture = { aggregation: 'day', - bin_width: 600, + bin_width: 86400, bins_count: 2, bins_start: 1171497600, timestamp_start: 1171497600, @@ -650,17 +650,17 @@ describe('histogram-dataview for date column type', function() { [{ bin: 0, timestamp: 1171497600, - min: 1171583400, - max: 1171583940, - avg: 1171583670, + min: 1171497600, + max: 1171497600, + avg: 1171497600, freq: 10 }, { bin: 1, timestamp: 1171584000, min: 1171584000, - max: 1171584600, - avg: 1171584300, + max: 1171584000, + avg: 1171584000, freq: 11 }], type: 'histogram' @@ -687,19 +687,19 @@ describe('histogram-dataview for date column type', function() { var dataviewWithDailyAggAndOffsetFixture = { aggregation: 'day', - bin_width: 1200, + bin_width: 86400, bins_count: 1, bins_start: 1171501200, - timestamp_start: 1171497600, + timestamp_start: 1171501200, nulls: 0, offset: -3600, bins: [{ bin: 0, timestamp: 1171501200, - min: 1171583400, - max: 1171584600, - avg: 1171584000, + min: 1171501200, + max: 1171501200, + avg: 1171501200, freq: 21 }], type: 'histogram'