Merge branch 'master' into mapconfig-aggregation

This commit is contained in:
Daniel García Aubert 2017-12-11 17:36:58 +01:00
commit 006dd86614
8 changed files with 355 additions and 380 deletions

14
NEWS.md
View File

@ -1,8 +1,20 @@
# Changelog # Changelog
## 4.2.1 ## 4.3.1
Released 2017-mm-dd Released 2017-mm-dd
Announcements:
## 4.3.0
Released 2017-12-11
Announcements:
- Optimize Formula queries.
- Optimize Formula queries in overviews.
- Optimize Numeric Histogram queries.
- Optimize Date Histogram queries.
- Date Histograms: Now returns the same value for max/min/avg/timestamp per bin.
- Date Histograms: Now it should return the same no matter the DB/Client time zone.
## 4.2.0 ## 4.2.0
Released 2017-12-04 Released 2017-12-04

View File

@ -1,34 +1,14 @@
const BaseDataview = require('./base'); const BaseDataview = require('./base');
const debug = require('debug')('windshaft:dataview:formula'); const debug = require('debug')('windshaft:dataview:formula');
const utils = require('../../utils/query-utils');
const countInfinitiesQueryTpl = ctx => ` const formulaQueryTpl = ctx =>
SELECT count(1) FROM (${ctx.query}) __cdb_formula_infinities `SELECT
WHERE ${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float ${ctx.operation}(${utils.handleFloatColumn(ctx)}) AS result,
`; ${utils.countNULLs(ctx)} AS nulls_count
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
const countNansQueryTpl = ctx => ` ${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
SELECT count(1) FROM (${ctx.query}) __cdb_formula_nans FROM (${ctx.query}) __cdb_formula`;
WHERE ${ctx.column} = 'NaN'::float
`;
const filterOutSpecialNumericValuesTpl = ctx => `
WHERE
${ctx.column} != 'infinity'::float
AND
${ctx.column} != '-infinity'::float
AND
${ctx.column} != 'NaN'::float
`;
const formulaQueryTpl = ctx => `
SELECT
${ctx.operation}(${ctx.column}) AS result,
(SELECT count(1) FROM (${ctx.query}) _cdb_formula_nulls WHERE ${ctx.column} IS NULL) AS nulls_count
${ctx.isFloatColumn ? `,(${countInfinitiesQueryTpl(ctx)}) AS infinities_count` : ''}
${ctx.isFloatColumn ? `,(${countNansQueryTpl(ctx)}) AS nans_count` : ''}
FROM (${ctx.query}) __cdb_formula
${ctx.isFloatColumn && ctx.operation !== 'count' ? `${filterOutSpecialNumericValuesTpl(ctx)}` : ''}
`;
const VALID_OPERATIONS = { const VALID_OPERATIONS = {
count: true, count: true,

View File

@ -1,5 +1,102 @@
const BaseHistogram = require('./base-histogram'); const BaseHistogram = require('./base-histogram');
const debug = require('debug')('windshaft:dataview:date-histogram'); const debug = require('debug')('windshaft:dataview:date-histogram');
const utils = require('../../../utils/query-utils');
/**
* Gets the name of a timezone with the same offset as the required
* using the pg_timezone_names table. We do this because it's simpler to pass
* the name than to pass the offset itself as PostgreSQL uses different
* sign convention. For example: TIME ZONE 'CET' is equal to TIME ZONE 'UTC-1',
* not 'UTC+1' which would be expected.
* Gives priority to Etc/GMT±N timezones but still support odd offsets like 8.5
* hours for Asia/Pyongyang.
* It also makes it easier to, in the future, support the input of expected timezone
* instead of the offset; that is using 'Europe/Madrid' instead of
* '+3600' or '+7200'. The daylight saving status can be handled by postgres.
*/
const offsetNameQueryTpl = ctx => `
WITH __wd_tz AS
(
SELECT name
FROM pg_timezone_names
WHERE utc_offset = interval '${ctx.offset} hours'
ORDER BY CASE WHEN name LIKE 'Etc/GMT%' THEN 0 ELSE 1 END
LIMIT 1
),`;
/**
* Function to get the subquery that places each row in its bin depending on
* the aggregation. Since the data stored is in epoch we need to adapt it to
* our timezone so when calling date_trunc it falls into the correct bin
*/
function dataBucketsQuery(ctx) {
var condition_str = '';
if (ctx.start !== 0) {
condition_str = `WHERE ${ctx.column} >= to_timestamp(${ctx.start})`;
}
if (ctx.end !== 0) {
if (condition_str === '') {
condition_str = `WHERE ${ctx.column} <= to_timestamp(${ctx.end})`;
}
else {
condition_str += ` and ${ctx.column} <= to_timestamp(${ctx.end})`;
}
}
return `
__wd_buckets AS
(
SELECT
date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, ${ctx.column}::timestamptz)) as timestamp,
count(*) as freq,
${utils.countNULLs(ctx)} as nulls_count
FROM
(
${ctx.query}
) __source, __wd_tz
${condition_str}
GROUP BY timestamp, __wd_tz.name
),`;
}
/**
* Function that generates an array with all the possible bins between the
* start and end date. If not provided we use the min and max generated from
* the dataBucketsQuery
*/
function allBucketsArrayQuery(ctx) {
var extra_from = ``;
var series_start = ``;
var series_end = ``;
if (ctx.start === 0) {
extra_from = `, __wd_buckets GROUP BY __wd_tz.name`;
series_start = `min(__wd_buckets.timestamp)`;
} else {
series_start = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.start})))`;
}
if (ctx.end === 0) {
extra_from = `, __wd_buckets GROUP BY __wd_tz.name`;
series_end = `max(__wd_buckets.timestamp)`;
} else {
series_end = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.end})))`;
}
return `
__wd_all_buckets AS
(
SELECT ARRAY(
SELECT
generate_series(
${series_start},
${series_end},
interval '${ctx.interval}') as bin_start
FROM __wd_tz${extra_from}
) as bins
)`;
}
const dateIntervalQueryTpl = ctx => ` const dateIntervalQueryTpl = ctx => `
WITH WITH
@ -41,107 +138,6 @@ const dateIntervalQueryTpl = ctx => `
FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds
`; `;
const nullsQueryTpl = ctx => `
__cdb_nulls AS (
SELECT
count(*) AS __cdb_nulls_count
FROM (${ctx.query}) __cdb_histogram_nulls
WHERE ${ctx.column} IS NULL
)
`;
const dateBasicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(date_part('epoch', ${ctx.column})) AS __cdb_max_val,
min(date_part('epoch', ${ctx.column})) AS __cdb_min_val,
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
min(
date_trunc(
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
)
) AS __cdb_start_date,
max(${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}') AS __cdb_end_date,
count(1) AS __cdb_total_rows
FROM (${ctx.query}) __cdb_basics_query
)
`;
const dateOverrideBasicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(${ctx.end})::float AS __cdb_max_val,
min(${ctx.start})::float AS __cdb_min_val,
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
min(
date_trunc(
'${ctx.aggregation}',
TO_TIMESTAMP(${ctx.start})::timestamp AT TIME ZONE '${ctx.offset}'
)
) AS __cdb_start_date,
max(
TO_TIMESTAMP(${ctx.end})::timestamp AT TIME ZONE '${ctx.offset}'
) AS __cdb_end_date,
count(1) AS __cdb_total_rows
FROM (${ctx.query}) __cdb_basics_query
)
`;
const dateBinsQueryTpl = ctx => `
__cdb_bins AS (
SELECT
__cdb_bins_array,
ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number
FROM (
SELECT
ARRAY(
SELECT GENERATE_SERIES(
__cdb_start_date::timestamptz,
__cdb_end_date::timestamptz,
${ctx.aggregation === 'quarter' ? `'3 month'::interval` : `'1 ${ctx.aggregation}'::interval`}
)
) AS __cdb_bins_array
FROM __cdb_basics
) __cdb_bins_array_query
)
`;
const dateHistogramQueryTpl = ctx => `
SELECT
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
__cdb_bins_number AS bins_number,
__cdb_nulls_count AS nulls_count,
CASE WHEN __cdb_min_val = __cdb_max_val
THEN 0
ELSE GREATEST(
1,
LEAST(
WIDTH_BUCKET(
${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}',
__cdb_bins_array
),
__cdb_bins_number
)
) - 1
END AS bin,
min(
date_part(
'epoch',
date_trunc(
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
) AT TIME ZONE '${ctx.offset}'
)
)::numeric AS timestamp,
date_part('epoch', __cdb_start_date)::numeric AS timestamp_start,
min(date_part('epoch', ${ctx.column}))::numeric AS min,
max(date_part('epoch', ${ctx.column}))::numeric AS max,
avg(date_part('epoch', ${ctx.column}))::numeric AS avg,
count(*) AS freq
FROM (${ctx.query}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls
WHERE date_part('epoch', ${ctx.column}) IS NOT NULL
GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start
ORDER BY bin
`;
const MAX_INTERVAL_VALUE = 366; const MAX_INTERVAL_VALUE = 366;
@ -176,12 +172,21 @@ module.exports = class DateHistogram extends BaseHistogram {
_buildQueryTpl (ctx) { _buildQueryTpl (ctx) {
return ` return `
WITH ${offsetNameQueryTpl(ctx)}
${this._hasOverridenRange(ctx.override) ? dateOverrideBasicsQueryTpl(ctx) : dateBasicsQueryTpl(ctx)}, ${dataBucketsQuery(ctx)}
${dateBinsQueryTpl(ctx)}, ${allBucketsArrayQuery(ctx)}
${nullsQueryTpl(ctx)} SELECT
${dateHistogramQueryTpl(ctx)} array_position(__wd_all_buckets.bins, __wd_buckets.timestamp) - 1 as bin,
`; date_part('epoch', timezone(__wd_tz.name, __wd_buckets.timestamp)) AS timestamp,
__wd_buckets.freq as freq,
date_part('epoch', timezone(__wd_tz.name, (__wd_all_buckets.bins)[1])) as timestamp_start,
array_length(__wd_all_buckets.bins, 1) as bins_number,
date_part('epoch', interval '${ctx.interval}') as bin_width,
__wd_buckets.nulls_count as nulls_count
FROM __wd_buckets, __wd_all_buckets, __wd_tz
GROUP BY __wd_tz.name, __wd_all_buckets.bins, __wd_buckets.timestamp, __wd_buckets.nulls_count, __wd_buckets.freq
ORDER BY bin ASC;
`;
} }
_buildQuery (psql, override, callback) { _buildQuery (psql, override, callback) {
@ -204,6 +209,9 @@ module.exports = class DateHistogram extends BaseHistogram {
return null; return null;
} }
var interval = this._getAggregation(override) === 'quarter' ?
'3 months' : '1 ' + this._getAggregation(override);
const histogramSql = this._buildQueryTpl({ const histogramSql = this._buildQueryTpl({
override: override, override: override,
query: this.query, query: this.query,
@ -211,7 +219,8 @@ module.exports = class DateHistogram extends BaseHistogram {
aggregation: this._getAggregation(override), aggregation: this._getAggregation(override),
start: this._getBinStart(override), start: this._getBinStart(override),
end: this._getBinEnd(override), end: this._getBinEnd(override),
offset: this._parseOffset(override) offset: this._parseOffset(override),
interval: interval
}); });
debug(histogramSql); debug(histogramSql);
@ -264,8 +273,8 @@ module.exports = class DateHistogram extends BaseHistogram {
offset: this._getOffset(override), offset: this._getOffset(override),
timestamp_start: firstRow.timestamp_start, timestamp_start: firstRow.timestamp_start,
bin_width: firstRow.bin_width, bin_width: firstRow.bin_width || 0,
bins_count: firstRow.bins_number, bins_count: firstRow.bins_number || 0,
bins_start: firstRow.timestamp, bins_start: firstRow.timestamp,
nulls: firstRow.nulls_count, nulls: firstRow.nulls_count,
infinities: firstRow.infinities_count, infinities: firstRow.infinities_count,
@ -275,6 +284,10 @@ module.exports = class DateHistogram extends BaseHistogram {
} }
_getBuckets (result) { _getBuckets (result) {
result.rows.forEach(function(row) {
row.min = row.max = row.avg = row.timestamp;
});
return result.rows.map(({ bin, min, max, avg, freq, timestamp }) => ({ bin, min, max, avg, freq, timestamp })); return result.rows.map(({ bin, min, max, avg, freq, timestamp }) => ({ bin, min, max, avg, freq, timestamp }));
} }

View File

@ -1,44 +1,25 @@
const BaseHistogram = require('./base-histogram'); const BaseHistogram = require('./base-histogram');
const debug = require('debug')('windshaft:dataview:numeric-histogram'); const debug = require('debug')('windshaft:dataview:numeric-histogram');
const utils = require('../../../utils/query-utils');
const columnCastTpl = ctx => `date_part('epoch', ${ctx.column})`; /** Query to get min and max values from the query */
const irqQueryTpl = ctx => `
const filterOutSpecialNumericValues = ctx => `
${ctx.column} != 'infinity'::float
AND
${ctx.column} != '-infinity'::float
AND
${ctx.column} != 'NaN'::float
`;
const filteredQueryTpl = ctx => `
__cdb_filtered_source AS ( __cdb_filtered_source AS (
SELECT * SELECT *
FROM (${ctx.query}) __cdb_filtered_source_query FROM (${ctx.query}) __cdb_filtered_source_query
WHERE ${ctx.column} IS NOT NULL WHERE ${utils.handleFloatColumn(ctx)} IS NOT NULL
${ctx.isFloatColumn ? `AND ${filterOutSpecialNumericValues(ctx)}` : ''} ),
)
`;
const basicsQueryTpl = ctx => `
__cdb_basics AS ( __cdb_basics AS (
SELECT SELECT
max(${ctx.column}) AS __cdb_max_val, min(${ctx.column}) AS __cdb_min_val, max(${ctx.column}) AS __cdb_max_val,
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows min(${ctx.column}) AS __cdb_min_val,
count(1) AS __cdb_total_rows
FROM __cdb_filtered_source FROM __cdb_filtered_source
) )
`; `;
const overrideBasicsQueryTpl = ctx => ` /* Query to calculate the number of bins (needs irqQueryTpl before it*/
__cdb_basics AS ( const binsQueryTpl = ctx => `
SELECT
max(${ctx.end}) AS __cdb_max_val, min(${ctx.start}) AS __cdb_min_val,
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows
FROM __cdb_filtered_source
)
`;
const iqrQueryTpl = ctx => `
__cdb_iqrange AS ( __cdb_iqrange AS (
SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr
FROM ( FROM (
@ -49,10 +30,7 @@ const iqrQueryTpl = ctx => `
WHERE quartile = 1 or quartile = 3 WHERE quartile = 1 or quartile = 3
GROUP BY quartile GROUP BY quartile
) __cdb_iqr ) __cdb_iqr
) ),
`;
const binsQueryTpl = ctx => `
__cdb_bins AS ( __cdb_bins AS (
SELECT SELECT
CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0 CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0
@ -70,83 +48,6 @@ const binsQueryTpl = ctx => `
) )
`; `;
const overrideBinsQueryTpl = ctx => `
__cdb_bins AS (
SELECT ${ctx.override.bins} AS __cdb_bins_number
)
`;
const nullsQueryTpl = ctx => `
__cdb_nulls AS (
SELECT
count(*) AS __cdb_nulls_count
FROM (${ctx.query}) __cdb_histogram_nulls
WHERE ${ctx.column} IS NULL
)
`;
const infinitiesQueryTpl = ctx => `
__cdb_infinities AS (
SELECT
count(*) AS __cdb_infinities_count
FROM (${ctx.query}) __cdb_infinities_query
WHERE
${ctx.column} = 'infinity'::float
OR
${ctx.column} = '-infinity'::float
)
`;
const nansQueryTpl = ctx => `
__cdb_nans AS (
SELECT
count(*) AS __cdb_nans_count
FROM (${ctx.query}) __cdb_nans_query
WHERE ${ctx.column} = 'NaN'::float
)
`;
const specialNumericValuesColumnDefinitionTpl = () => `
__cdb_infinities_count AS infinities_count,
__cdb_nans_count AS nans_count
`;
const specialNumericValuesCTETpl = () => `
__cdb_infinities, __cdb_nans
`;
const specialNumericValuesColumnTpl = () => `
infinities_count, nans_count
`;
const histogramQueryTpl = ctx => `
SELECT
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
__cdb_bins_number AS bins_number,
__cdb_nulls_count AS nulls_count,
${ctx.isFloatColumn ? `${specialNumericValuesColumnDefinitionTpl()},` : ''}
__cdb_avg_val AS avg_val,
CASE WHEN __cdb_min_val = __cdb_max_val
THEN 0
ELSE GREATEST(
1,
LEAST(
WIDTH_BUCKET(${ctx.column}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),
__cdb_bins_number
)
) - 1
END AS bin,
min(${ctx.column})::numeric AS min,
max(${ctx.column})::numeric AS max,
avg(${ctx.column})::numeric AS avg,
count(*) AS freq
FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls, __cdb_bins
${ctx.isFloatColumn ? `, ${specialNumericValuesCTETpl()}` : ''}
GROUP BY bin, bins_number, bin_width, nulls_count, avg_val
${ctx.isFloatColumn ? `, ${specialNumericValuesColumnTpl()}` : ''}
ORDER BY bin
`;
const BIN_MIN_NUMBER = 6; const BIN_MIN_NUMBER = 6;
const BIN_MAX_NUMBER = 48; const BIN_MAX_NUMBER = 48;
@ -167,14 +68,14 @@ module.exports = class NumericHistogram extends BaseHistogram {
_buildQuery (psql, override, callback) { _buildQuery (psql, override, callback) {
const histogramSql = this._buildQueryTpl({ const histogramSql = this._buildQueryTpl({
override: override, column: this._columnType === 'date' ? utils.columnCastTpl({ column: this.column }) : this.column,
column: this._columnType === 'date' ? columnCastTpl({ column: this.column }) : this.column,
isFloatColumn: this._columnType === 'float', isFloatColumn: this._columnType === 'float',
query: this.query, query: this.query,
start: this._getBinStart(override), start: this._getBinStart(override),
end: this._getBinEnd(override), end: this._getBinEnd(override),
bins: this._getBinsCount(override),
minBins: BIN_MIN_NUMBER, minBins: BIN_MIN_NUMBER,
maxBins: BIN_MAX_NUMBER, maxBins: BIN_MAX_NUMBER
}); });
debug(histogramSql); debug(histogramSql);
@ -182,19 +83,62 @@ module.exports = class NumericHistogram extends BaseHistogram {
return callback(null, histogramSql); return callback(null, histogramSql);
} }
/**
* ctx: Object with the following values
* ctx.column -- Column for the histogram
* ctx.isFloatColumn - Whether the column is float or not
* ctx.query -- Subquery to extract data
* ctx.start -- Start value for the bins. [>= end to force calculation]
* ctx.end -- End value for the bins.
* ctx.bins -- Numbers of bins to generate [<0 to force calculation]
* ctx.minBins - If !full min bins to calculate [Optional]
* ctx.maxBins - If !full max bins to calculate [Optional]
*/
_buildQueryTpl (ctx) { _buildQueryTpl (ctx) {
var extra_tables = ``;
var extra_queries = ``;
var extra_groupby = ``;
if (ctx.start >= ctx.end) {
ctx.end = `__cdb_basics.__cdb_max_val`;
ctx.start = `__cdb_basics.__cdb_min_val`;
extra_groupby = `, __cdb_basics.__cdb_max_val, __cdb_basics.__cdb_min_val`;
extra_tables = `, __cdb_basics`;
extra_queries = `WITH ${irqQueryTpl(ctx)}`;
}
if (ctx.bins <= 0) {
ctx.bins = `__cdb_bins.__cdb_bins_number`;
extra_groupby += `, __cdb_bins.__cdb_bins_number`;
extra_tables += `, __cdb_bins`;
extra_queries = `WITH ${irqQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`;
}
return ` return `
WITH ${extra_queries}
${filteredQueryTpl(ctx)}, SELECT
${this._hasOverridenRange(ctx.override) ? overrideBasicsQueryTpl(ctx) : basicsQueryTpl(ctx)}, (${ctx.end} - ${ctx.start}) / ${ctx.bins}::float AS bin_width,
${this._hasOverridenBins(ctx.override) ? ${ctx.bins} as bins_number,
overrideBinsQueryTpl(ctx) : ${utils.countNULLs(ctx)} AS nulls_count,
`${iqrQueryTpl(ctx)}, ${binsQueryTpl(ctx)}` ${utils.countInfinites(ctx)} AS infinities_count,
}, ${utils.countNaNs(ctx)} AS nans_count,
${nullsQueryTpl(ctx)} min(${utils.handleFloatColumn(ctx)}) AS min,
${ctx.isFloatColumn ? `,${infinitiesQueryTpl(ctx)}, ${nansQueryTpl(ctx)}` : ''} max(${utils.handleFloatColumn(ctx)}) AS max,
${histogramQueryTpl(ctx)} avg(${utils.handleFloatColumn(ctx)}) AS avg,
`; sum(CASE WHEN (${utils.handleFloatColumn(ctx)} is not NULL) THEN 1 ELSE 0 END) as freq,
CASE WHEN ${ctx.start} = ${ctx.end}
THEN 0
ELSE GREATEST(1, LEAST(
${ctx.bins},
WIDTH_BUCKET(${utils.handleFloatColumn(ctx)}, ${ctx.start}, ${ctx.end}, ${ctx.bins}))) - 1
END AS bin
FROM
(
${ctx.query}
) __cdb_filtered_source_query${extra_tables}
GROUP BY bin${extra_groupby}
ORDER BY bin;`;
} }
_hasOverridenBins (override) { _hasOverridenBins (override) {
@ -204,14 +148,31 @@ module.exports = class NumericHistogram extends BaseHistogram {
_getSummary (result, override) { _getSummary (result, override) {
const firstRow = result.rows[0] || {}; const firstRow = result.rows[0] || {};
var total_nulls = 0;
var total_infinities = 0;
var total_nans = 0;
var total_avg = 0;
var total_count = 0;
result.rows.forEach(function(row) {
total_nulls += row.nulls_count;
total_infinities += row.infinities_count;
total_nans += row.nans_count;
total_avg += row.avg * row.freq;
total_count += row.freq;
});
if (total_count !== 0) {
total_avg /= total_count;
}
return { return {
bin_width: firstRow.bin_width, bin_width: firstRow.bin_width,
bins_count: firstRow.bins_number, bins_count: firstRow.bins_number,
bins_start: this._populateBinStart(firstRow, override), bins_start: this._populateBinStart(firstRow, override),
nulls: firstRow.nulls_count, nulls: total_nulls,
infinities: firstRow.infinities_count, infinities: total_infinities,
nans: firstRow.nans_count, nans: total_nans,
avg: firstRow.avg_val, avg: total_avg
}; };
} }

View File

@ -1,55 +1,38 @@
var BaseOverviewsDataview = require('./base'); var BaseOverviewsDataview = require('./base');
var BaseDataview = require('../formula'); var BaseDataview = require('../formula');
var debug = require('debug')('windshaft:widget:formula:overview'); var debug = require('debug')('windshaft:widget:formula:overview');
const utils = require('../../../utils/query-utils');
var dot = require('dot'); var dot = require('dot');
dot.templateSettings.strip = false; dot.templateSettings.strip = false;
var formulaQueryTpls = { const VALID_OPERATIONS = {
'count': dot.template([ count: true,
'SELECT', sum: true,
'sum(_feature_count) AS result,', avg: true
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula'
].join('\n')),
'sum': dot.template([
'SELECT',
'sum({{=it._column}}*_feature_count) AS result,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula',
'{{?it._isFloatColumn}}WHERE',
' {{=it._column}} != \'infinity\'::float',
'AND',
' {{=it._column}} != \'-infinity\'::float',
'AND',
' {{=it._column}} != \'NaN\'::float{{?}}'
].join('\n')),
'avg': dot.template([
'SELECT',
'sum({{=it._column}}*_feature_count)/sum(_feature_count) AS result,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula',
'{{?it._isFloatColumn}}WHERE',
' {{=it._column}} != \'infinity\'::float',
'AND',
' {{=it._column}} != \'-infinity\'::float',
'AND',
' {{=it._column}} != \'NaN\'::float{{?}}'
].join('\n')),
}; };
/** Formulae to calculate the end result using _feature_count from the overview table*/
function dataviewResult(ctx) {
switch (ctx.operation) {
case 'count':
return `sum(_feature_count)`;
case 'sum':
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)`;
case 'avg':
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)/sum(_feature_count) `;
}
return `${ctx.operation}(${utils.handleFloatColumn(ctx)})`;
}
const formulaQueryTpl = ctx =>
`SELECT
${dataviewResult(ctx)} AS result,
${utils.countNULLs(ctx)} AS nulls_count
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
FROM (${ctx.query}) __cdb_formula`;
function Formula(query, options, queryRewriter, queryRewriteData, params, queries) { function Formula(query, options, queryRewriter, queryRewriteData, params, queries) {
BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries); BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries);
this.column = options.column || '1'; this.column = options.column || '1';
@ -65,36 +48,31 @@ module.exports = Formula;
Formula.prototype.sql = function (psql, override, callback) { Formula.prototype.sql = function (psql, override, callback) {
var self = this; var self = this;
var formulaQueryTpl = formulaQueryTpls[this.operation]; if (!VALID_OPERATIONS[this.operation]) {
return this.defaultSql(psql, override, callback);
if (formulaQueryTpl) {
// supported formula for use with overviews
if (this._isFloatColumn === null) {
this._isFloatColumn = false;
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
if (!err && !!type) {
self._isFloatColumn = type.float;
}
self.sql(psql, override, callback);
});
return null;
}
var formulaSql = formulaQueryTpl({
_isFloatColumn: this._isFloatColumn,
_query: this.rewrittenQuery(this.query),
_operation: this.operation,
_column: this.column
});
callback = callback || override;
debug(formulaSql);
return callback(null, formulaSql);
} }
if (this._isFloatColumn === null) {
this._isFloatColumn = false;
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
if (!err && !!type) {
self._isFloatColumn = type.float;
}
self.sql(psql, override, callback);
});
return null;
}
// default behaviour var formulaSql = formulaQueryTpl({
return this.defaultSql(psql, override, callback); isFloatColumn: this._isFloatColumn,
query: this.rewrittenQuery(this.query),
operation: this.operation,
column: this.column
});
callback = callback || override;
debug(formulaSql);
return callback(null, formulaSql);
}; };

View File

@ -40,3 +40,34 @@ module.exports.getAggregationMetadata = ctx => `
geom_type AS type geom_type AS type
FROM rowEstimation, geometryType; FROM rowEstimation, geometryType;
`; `;
/** Cast the column to epoch */
module.exports.columnCastTpl = function columnCastTpl(ctx) {
return `date_part('epoch', ${ctx.column})`;
};
/** If the column type is float, ignore any non numeric result (infinity / NaN) */
module.exports.handleFloatColumn = function handleFloatColumn(ctx) {
return `${!ctx.isFloatColumn ? `${ctx.column}` :
`nullif(nullif(nullif(${ctx.column}, 'infinity'::float), '-infinity'::float), 'NaN'::float)`
}`;
};
/** Count NULL appearances */
module.exports.countNULLs= function countNULLs(ctx) {
return `sum(CASE WHEN (${ctx.column} IS NULL) THEN 1 ELSE 0 END)`;
};
/** Count only infinity (positive and negative) appearances */
module.exports.countInfinites = function countInfinites(ctx) {
return `${!ctx.isFloatColumn ? `0` :
`sum(CASE WHEN (${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float) THEN 1 ELSE 0 END)`
}`;
};
/** Count only NaNs appearances*/
module.exports.countNaNs = function countNaNs(ctx) {
return `${!ctx.isFloatColumn ? `0` :
`sum(CASE WHEN (${ctx.column} = 'NaN'::float) THEN 1 ELSE 0 END)`
}`;
};

View File

@ -1,7 +1,7 @@
{ {
"private": true, "private": true,
"name": "windshaft-cartodb", "name": "windshaft-cartodb",
"version": "4.2.1", "version": "4.3.1",
"description": "A map tile server for CartoDB", "description": "A map tile server for CartoDB",
"keywords": [ "keywords": [
"cartodb" "cartodb"

View File

@ -186,7 +186,7 @@ describe('histogram-dataview for date column type', function() {
}, },
minute_histogram: { minute_histogram: {
source: { source: {
id: 'minute-histogram-source' id: 'minute-histogram-source-tz'
}, },
type: 'histogram', type: 'histogram',
options: { options: {
@ -214,8 +214,8 @@ describe('histogram-dataview for date column type', function() {
"params": { "params": {
"query": [ "query": [
"select null::geometry the_geom_webmercator, date AS d", "select null::geometry the_geom_webmercator, date AS d",
"from generate_series(", "from generate_series('2007-02-15 01:00:00+00'::timestamptz,",
"'2007-02-15 01:00:00'::timestamptz, '2008-04-09 01:00:00'::timestamptz, '1 day'::interval", "'2008-04-09 01:00:00+00'::timestamptz, '1 day'::interval",
") date" ") date"
].join(' ') ].join(' ')
} }
@ -233,13 +233,13 @@ describe('histogram-dataview for date column type', function() {
} }
}, },
{ {
"id": "minute-histogram-source", "id": "minute-histogram-source-tz",
"type": "source", "type": "source",
"params": { "params": {
"query": [ "query": [
"select null::geometry the_geom_webmercator, date AS d", "select null::geometry the_geom_webmercator, date AS d",
"from generate_series(", "from generate_series('2007-02-15 23:50:00+00'::timestamptz,",
"'2007-02-15 23:50:00'::timestamp, '2007-02-16 00:10:00'::timestamp, '1 minute'::interval", "'2007-02-16 00:10:00+00'::timestamptz, '1 minute'::interval",
") date" ") date"
].join(' ') ].join(' ')
} }
@ -256,6 +256,7 @@ describe('histogram-dataview for date column type', function() {
}]; }];
dateHistogramsUseCases.forEach(function (test) { dateHistogramsUseCases.forEach(function (test) {
it('should create a date histogram aggregated in months (EDT) ' + test.desc, function (done) { it('should create a date histogram aggregated in months (EDT) ' + test.desc, function (done) {
var OFFSET_EDT_IN_MINUTES = -4 * 60; // EDT Eastern Daylight Time (GMT-4) in minutes var OFFSET_EDT_IN_MINUTES = -4 * 60; // EDT Eastern Daylight Time (GMT-4) in minutes
@ -323,7 +324,7 @@ describe('histogram-dataview for date column type', function() {
assert.ok(!err, err); assert.ok(!err, err);
assert.equal(dataview.type, 'histogram'); assert.equal(dataview.type, 'histogram');
assert.ok(dataview.bin_width > 0, 'Unexpected bin width: ' + dataview.bin_width); assert.ok(dataview.bin_width > 0, 'Unexpected bin width: ' + dataview.bin_width);
assert.equal(dataview.bins.length, 6); assert.equal(dataview.bins_count, 6);
dataview.bins.forEach(function (bin) { dataview.bins.forEach(function (bin) {
assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin)); assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin));
}); });
@ -335,7 +336,7 @@ describe('histogram-dataview for date column type', function() {
it('should cast overridden start and end to float to avoid out of range errors ' + test.desc, function (done) { it('should cast overridden start and end to float to avoid out of range errors ' + test.desc, function (done) {
var params = { var params = {
start: -2145916800, start: -2145916800,
end: 1009843199 end: 1193792400
}; };
this.testClient = new TestClient(mapConfig, 1234); this.testClient = new TestClient(mapConfig, 1234);
@ -348,27 +349,6 @@ describe('histogram-dataview for date column type', function() {
}); });
}); });
it('should return same histogram ' + test.desc, function (done) {
var params = {
start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum)
end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum)
};
this.testClient = new TestClient(mapConfig, 1234);
this.testClient.getDataview(test.dataviewId, {}, function (err, dataview) {
assert.ok(!err, err);
this.testClient = new TestClient(mapConfig, 1234);
this.testClient.getDataview(test.dataviewId, params, function (err, filteredDataview) {
assert.ok(!err, err);
assert.deepEqual(dataview, filteredDataview);
done();
});
});
});
it('should aggregate histogram overriding default offset to CEST ' + test.desc, function (done) { it('should aggregate histogram overriding default offset to CEST ' + test.desc, function (done) {
var OFFSET_CEST_IN_SECONDS = 2 * 3600; // Central European Summer Time (Daylight Saving Time) var OFFSET_CEST_IN_SECONDS = 2 * 3600; // Central European Summer Time (Daylight Saving Time)
var OFFSET_CEST_IN_MINUTES = 2 * 60; // Central European Summer Time (Daylight Saving Time) var OFFSET_CEST_IN_MINUTES = 2 * 60; // Central European Summer Time (Daylight Saving Time)
@ -533,6 +513,26 @@ describe('histogram-dataview for date column type', function() {
}); });
}); });
it('should return same histogram ', function (done) {
var params = {
start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum)
end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum)
};
this.testClient = new TestClient(mapConfig, 1234);
this.testClient.getDataview('datetime_histogram_tz', {}, function (err, dataview) {
assert.ok(!err, err);
this.testClient = new TestClient(mapConfig, 1234);
this.testClient.getDataview('datetime_histogram_tz', params, function (err, filteredDataview) {
assert.ok(!err, err);
assert.deepEqual(dataview, filteredDataview);
done();
});
});
});
it('should find the best aggregation (automatic mode) to build the histogram', function (done) { it('should find the best aggregation (automatic mode) to build the histogram', function (done) {
var params = {}; var params = {};
this.testClient = new TestClient(mapConfig, 1234); this.testClient = new TestClient(mapConfig, 1234);
@ -640,7 +640,7 @@ describe('histogram-dataview for date column type', function() {
var dataviewWithDailyAggFixture = { var dataviewWithDailyAggFixture = {
aggregation: 'day', aggregation: 'day',
bin_width: 600, bin_width: 86400,
bins_count: 2, bins_count: 2,
bins_start: 1171497600, bins_start: 1171497600,
timestamp_start: 1171497600, timestamp_start: 1171497600,
@ -650,17 +650,17 @@ describe('histogram-dataview for date column type', function() {
[{ [{
bin: 0, bin: 0,
timestamp: 1171497600, timestamp: 1171497600,
min: 1171583400, min: 1171497600,
max: 1171583940, max: 1171497600,
avg: 1171583670, avg: 1171497600,
freq: 10 freq: 10
}, },
{ {
bin: 1, bin: 1,
timestamp: 1171584000, timestamp: 1171584000,
min: 1171584000, min: 1171584000,
max: 1171584600, max: 1171584000,
avg: 1171584300, avg: 1171584000,
freq: 11 freq: 11
}], }],
type: 'histogram' type: 'histogram'
@ -687,19 +687,19 @@ describe('histogram-dataview for date column type', function() {
var dataviewWithDailyAggAndOffsetFixture = { var dataviewWithDailyAggAndOffsetFixture = {
aggregation: 'day', aggregation: 'day',
bin_width: 1200, bin_width: 86400,
bins_count: 1, bins_count: 1,
bins_start: 1171501200, bins_start: 1171501200,
timestamp_start: 1171497600, timestamp_start: 1171501200,
nulls: 0, nulls: 0,
offset: -3600, offset: -3600,
bins: bins:
[{ [{
bin: 0, bin: 0,
timestamp: 1171501200, timestamp: 1171501200,
min: 1171583400, min: 1171501200,
max: 1171584600, max: 1171501200,
avg: 1171584000, avg: 1171501200,
freq: 21 freq: 21
}], }],
type: 'histogram' type: 'histogram'