Merge branch 'master' into mapconfig-aggregation
This commit is contained in:
commit
006dd86614
14
NEWS.md
14
NEWS.md
@ -1,8 +1,20 @@
|
||||
# Changelog
|
||||
|
||||
## 4.2.1
|
||||
## 4.3.1
|
||||
Released 2017-mm-dd
|
||||
|
||||
Announcements:
|
||||
|
||||
## 4.3.0
|
||||
Released 2017-12-11
|
||||
|
||||
Announcements:
|
||||
- Optimize Formula queries.
|
||||
- Optimize Formula queries in overviews.
|
||||
- Optimize Numeric Histogram queries.
|
||||
- Optimize Date Histogram queries.
|
||||
- Date Histograms: Now returns the same value for max/min/avg/timestamp per bin.
|
||||
- Date Histograms: Now it should return the same no matter the DB/Client time zone.
|
||||
|
||||
## 4.2.0
|
||||
Released 2017-12-04
|
||||
|
@ -1,34 +1,14 @@
|
||||
const BaseDataview = require('./base');
|
||||
const debug = require('debug')('windshaft:dataview:formula');
|
||||
const utils = require('../../utils/query-utils');
|
||||
|
||||
const countInfinitiesQueryTpl = ctx => `
|
||||
SELECT count(1) FROM (${ctx.query}) __cdb_formula_infinities
|
||||
WHERE ${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float
|
||||
`;
|
||||
|
||||
const countNansQueryTpl = ctx => `
|
||||
SELECT count(1) FROM (${ctx.query}) __cdb_formula_nans
|
||||
WHERE ${ctx.column} = 'NaN'::float
|
||||
`;
|
||||
|
||||
const filterOutSpecialNumericValuesTpl = ctx => `
|
||||
WHERE
|
||||
${ctx.column} != 'infinity'::float
|
||||
AND
|
||||
${ctx.column} != '-infinity'::float
|
||||
AND
|
||||
${ctx.column} != 'NaN'::float
|
||||
`;
|
||||
|
||||
const formulaQueryTpl = ctx => `
|
||||
SELECT
|
||||
${ctx.operation}(${ctx.column}) AS result,
|
||||
(SELECT count(1) FROM (${ctx.query}) _cdb_formula_nulls WHERE ${ctx.column} IS NULL) AS nulls_count
|
||||
${ctx.isFloatColumn ? `,(${countInfinitiesQueryTpl(ctx)}) AS infinities_count` : ''}
|
||||
${ctx.isFloatColumn ? `,(${countNansQueryTpl(ctx)}) AS nans_count` : ''}
|
||||
FROM (${ctx.query}) __cdb_formula
|
||||
${ctx.isFloatColumn && ctx.operation !== 'count' ? `${filterOutSpecialNumericValuesTpl(ctx)}` : ''}
|
||||
`;
|
||||
const formulaQueryTpl = ctx =>
|
||||
`SELECT
|
||||
${ctx.operation}(${utils.handleFloatColumn(ctx)}) AS result,
|
||||
${utils.countNULLs(ctx)} AS nulls_count
|
||||
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
|
||||
${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
|
||||
FROM (${ctx.query}) __cdb_formula`;
|
||||
|
||||
const VALID_OPERATIONS = {
|
||||
count: true,
|
||||
|
@ -1,5 +1,102 @@
|
||||
const BaseHistogram = require('./base-histogram');
|
||||
const debug = require('debug')('windshaft:dataview:date-histogram');
|
||||
const utils = require('../../../utils/query-utils');
|
||||
|
||||
/**
|
||||
* Gets the name of a timezone with the same offset as the required
|
||||
* using the pg_timezone_names table. We do this because it's simpler to pass
|
||||
* the name than to pass the offset itself as PostgreSQL uses different
|
||||
* sign convention. For example: TIME ZONE 'CET' is equal to TIME ZONE 'UTC-1',
|
||||
* not 'UTC+1' which would be expected.
|
||||
* Gives priority to Etc/GMT±N timezones but still support odd offsets like 8.5
|
||||
* hours for Asia/Pyongyang.
|
||||
* It also makes it easier to, in the future, support the input of expected timezone
|
||||
* instead of the offset; that is using 'Europe/Madrid' instead of
|
||||
* '+3600' or '+7200'. The daylight saving status can be handled by postgres.
|
||||
*/
|
||||
const offsetNameQueryTpl = ctx => `
|
||||
WITH __wd_tz AS
|
||||
(
|
||||
SELECT name
|
||||
FROM pg_timezone_names
|
||||
WHERE utc_offset = interval '${ctx.offset} hours'
|
||||
ORDER BY CASE WHEN name LIKE 'Etc/GMT%' THEN 0 ELSE 1 END
|
||||
LIMIT 1
|
||||
),`;
|
||||
|
||||
/**
|
||||
* Function to get the subquery that places each row in its bin depending on
|
||||
* the aggregation. Since the data stored is in epoch we need to adapt it to
|
||||
* our timezone so when calling date_trunc it falls into the correct bin
|
||||
*/
|
||||
function dataBucketsQuery(ctx) {
|
||||
var condition_str = '';
|
||||
|
||||
if (ctx.start !== 0) {
|
||||
condition_str = `WHERE ${ctx.column} >= to_timestamp(${ctx.start})`;
|
||||
}
|
||||
if (ctx.end !== 0) {
|
||||
if (condition_str === '') {
|
||||
condition_str = `WHERE ${ctx.column} <= to_timestamp(${ctx.end})`;
|
||||
}
|
||||
else {
|
||||
condition_str += ` and ${ctx.column} <= to_timestamp(${ctx.end})`;
|
||||
}
|
||||
}
|
||||
|
||||
return `
|
||||
__wd_buckets AS
|
||||
(
|
||||
SELECT
|
||||
date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, ${ctx.column}::timestamptz)) as timestamp,
|
||||
count(*) as freq,
|
||||
${utils.countNULLs(ctx)} as nulls_count
|
||||
FROM
|
||||
(
|
||||
${ctx.query}
|
||||
) __source, __wd_tz
|
||||
${condition_str}
|
||||
GROUP BY timestamp, __wd_tz.name
|
||||
),`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that generates an array with all the possible bins between the
|
||||
* start and end date. If not provided we use the min and max generated from
|
||||
* the dataBucketsQuery
|
||||
*/
|
||||
function allBucketsArrayQuery(ctx) {
|
||||
var extra_from = ``;
|
||||
var series_start = ``;
|
||||
var series_end = ``;
|
||||
|
||||
if (ctx.start === 0) {
|
||||
extra_from = `, __wd_buckets GROUP BY __wd_tz.name`;
|
||||
series_start = `min(__wd_buckets.timestamp)`;
|
||||
} else {
|
||||
series_start = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.start})))`;
|
||||
}
|
||||
|
||||
if (ctx.end === 0) {
|
||||
extra_from = `, __wd_buckets GROUP BY __wd_tz.name`;
|
||||
series_end = `max(__wd_buckets.timestamp)`;
|
||||
} else {
|
||||
series_end = `date_trunc('${ctx.aggregation}', timezone(__wd_tz.name, to_timestamp(${ctx.end})))`;
|
||||
}
|
||||
|
||||
return `
|
||||
__wd_all_buckets AS
|
||||
(
|
||||
SELECT ARRAY(
|
||||
SELECT
|
||||
generate_series(
|
||||
${series_start},
|
||||
${series_end},
|
||||
interval '${ctx.interval}') as bin_start
|
||||
FROM __wd_tz${extra_from}
|
||||
) as bins
|
||||
)`;
|
||||
}
|
||||
|
||||
const dateIntervalQueryTpl = ctx => `
|
||||
WITH
|
||||
@ -41,107 +138,6 @@ const dateIntervalQueryTpl = ctx => `
|
||||
FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds
|
||||
`;
|
||||
|
||||
const nullsQueryTpl = ctx => `
|
||||
__cdb_nulls AS (
|
||||
SELECT
|
||||
count(*) AS __cdb_nulls_count
|
||||
FROM (${ctx.query}) __cdb_histogram_nulls
|
||||
WHERE ${ctx.column} IS NULL
|
||||
)
|
||||
`;
|
||||
|
||||
const dateBasicsQueryTpl = ctx => `
|
||||
__cdb_basics AS (
|
||||
SELECT
|
||||
max(date_part('epoch', ${ctx.column})) AS __cdb_max_val,
|
||||
min(date_part('epoch', ${ctx.column})) AS __cdb_min_val,
|
||||
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
|
||||
min(
|
||||
date_trunc(
|
||||
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
|
||||
)
|
||||
) AS __cdb_start_date,
|
||||
max(${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}') AS __cdb_end_date,
|
||||
count(1) AS __cdb_total_rows
|
||||
FROM (${ctx.query}) __cdb_basics_query
|
||||
)
|
||||
`;
|
||||
|
||||
const dateOverrideBasicsQueryTpl = ctx => `
|
||||
__cdb_basics AS (
|
||||
SELECT
|
||||
max(${ctx.end})::float AS __cdb_max_val,
|
||||
min(${ctx.start})::float AS __cdb_min_val,
|
||||
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
|
||||
min(
|
||||
date_trunc(
|
||||
'${ctx.aggregation}',
|
||||
TO_TIMESTAMP(${ctx.start})::timestamp AT TIME ZONE '${ctx.offset}'
|
||||
)
|
||||
) AS __cdb_start_date,
|
||||
max(
|
||||
TO_TIMESTAMP(${ctx.end})::timestamp AT TIME ZONE '${ctx.offset}'
|
||||
) AS __cdb_end_date,
|
||||
count(1) AS __cdb_total_rows
|
||||
FROM (${ctx.query}) __cdb_basics_query
|
||||
)
|
||||
`;
|
||||
|
||||
const dateBinsQueryTpl = ctx => `
|
||||
__cdb_bins AS (
|
||||
SELECT
|
||||
__cdb_bins_array,
|
||||
ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number
|
||||
FROM (
|
||||
SELECT
|
||||
ARRAY(
|
||||
SELECT GENERATE_SERIES(
|
||||
__cdb_start_date::timestamptz,
|
||||
__cdb_end_date::timestamptz,
|
||||
${ctx.aggregation === 'quarter' ? `'3 month'::interval` : `'1 ${ctx.aggregation}'::interval`}
|
||||
)
|
||||
) AS __cdb_bins_array
|
||||
FROM __cdb_basics
|
||||
) __cdb_bins_array_query
|
||||
)
|
||||
`;
|
||||
|
||||
const dateHistogramQueryTpl = ctx => `
|
||||
SELECT
|
||||
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
|
||||
__cdb_bins_number AS bins_number,
|
||||
__cdb_nulls_count AS nulls_count,
|
||||
CASE WHEN __cdb_min_val = __cdb_max_val
|
||||
THEN 0
|
||||
ELSE GREATEST(
|
||||
1,
|
||||
LEAST(
|
||||
WIDTH_BUCKET(
|
||||
${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}',
|
||||
__cdb_bins_array
|
||||
),
|
||||
__cdb_bins_number
|
||||
)
|
||||
) - 1
|
||||
END AS bin,
|
||||
min(
|
||||
date_part(
|
||||
'epoch',
|
||||
date_trunc(
|
||||
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
|
||||
) AT TIME ZONE '${ctx.offset}'
|
||||
)
|
||||
)::numeric AS timestamp,
|
||||
date_part('epoch', __cdb_start_date)::numeric AS timestamp_start,
|
||||
min(date_part('epoch', ${ctx.column}))::numeric AS min,
|
||||
max(date_part('epoch', ${ctx.column}))::numeric AS max,
|
||||
avg(date_part('epoch', ${ctx.column}))::numeric AS avg,
|
||||
count(*) AS freq
|
||||
FROM (${ctx.query}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls
|
||||
WHERE date_part('epoch', ${ctx.column}) IS NOT NULL
|
||||
GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start
|
||||
ORDER BY bin
|
||||
`;
|
||||
|
||||
const MAX_INTERVAL_VALUE = 366;
|
||||
|
||||
@ -176,12 +172,21 @@ module.exports = class DateHistogram extends BaseHistogram {
|
||||
|
||||
_buildQueryTpl (ctx) {
|
||||
return `
|
||||
WITH
|
||||
${this._hasOverridenRange(ctx.override) ? dateOverrideBasicsQueryTpl(ctx) : dateBasicsQueryTpl(ctx)},
|
||||
${dateBinsQueryTpl(ctx)},
|
||||
${nullsQueryTpl(ctx)}
|
||||
${dateHistogramQueryTpl(ctx)}
|
||||
`;
|
||||
${offsetNameQueryTpl(ctx)}
|
||||
${dataBucketsQuery(ctx)}
|
||||
${allBucketsArrayQuery(ctx)}
|
||||
SELECT
|
||||
array_position(__wd_all_buckets.bins, __wd_buckets.timestamp) - 1 as bin,
|
||||
date_part('epoch', timezone(__wd_tz.name, __wd_buckets.timestamp)) AS timestamp,
|
||||
__wd_buckets.freq as freq,
|
||||
date_part('epoch', timezone(__wd_tz.name, (__wd_all_buckets.bins)[1])) as timestamp_start,
|
||||
array_length(__wd_all_buckets.bins, 1) as bins_number,
|
||||
date_part('epoch', interval '${ctx.interval}') as bin_width,
|
||||
__wd_buckets.nulls_count as nulls_count
|
||||
FROM __wd_buckets, __wd_all_buckets, __wd_tz
|
||||
GROUP BY __wd_tz.name, __wd_all_buckets.bins, __wd_buckets.timestamp, __wd_buckets.nulls_count, __wd_buckets.freq
|
||||
ORDER BY bin ASC;
|
||||
`;
|
||||
}
|
||||
|
||||
_buildQuery (psql, override, callback) {
|
||||
@ -204,6 +209,9 @@ module.exports = class DateHistogram extends BaseHistogram {
|
||||
return null;
|
||||
}
|
||||
|
||||
var interval = this._getAggregation(override) === 'quarter' ?
|
||||
'3 months' : '1 ' + this._getAggregation(override);
|
||||
|
||||
const histogramSql = this._buildQueryTpl({
|
||||
override: override,
|
||||
query: this.query,
|
||||
@ -211,7 +219,8 @@ module.exports = class DateHistogram extends BaseHistogram {
|
||||
aggregation: this._getAggregation(override),
|
||||
start: this._getBinStart(override),
|
||||
end: this._getBinEnd(override),
|
||||
offset: this._parseOffset(override)
|
||||
offset: this._parseOffset(override),
|
||||
interval: interval
|
||||
});
|
||||
|
||||
debug(histogramSql);
|
||||
@ -264,8 +273,8 @@ module.exports = class DateHistogram extends BaseHistogram {
|
||||
offset: this._getOffset(override),
|
||||
timestamp_start: firstRow.timestamp_start,
|
||||
|
||||
bin_width: firstRow.bin_width,
|
||||
bins_count: firstRow.bins_number,
|
||||
bin_width: firstRow.bin_width || 0,
|
||||
bins_count: firstRow.bins_number || 0,
|
||||
bins_start: firstRow.timestamp,
|
||||
nulls: firstRow.nulls_count,
|
||||
infinities: firstRow.infinities_count,
|
||||
@ -275,6 +284,10 @@ module.exports = class DateHistogram extends BaseHistogram {
|
||||
}
|
||||
|
||||
_getBuckets (result) {
|
||||
result.rows.forEach(function(row) {
|
||||
row.min = row.max = row.avg = row.timestamp;
|
||||
});
|
||||
|
||||
return result.rows.map(({ bin, min, max, avg, freq, timestamp }) => ({ bin, min, max, avg, freq, timestamp }));
|
||||
}
|
||||
|
||||
|
@ -1,44 +1,25 @@
|
||||
const BaseHistogram = require('./base-histogram');
|
||||
const debug = require('debug')('windshaft:dataview:numeric-histogram');
|
||||
const utils = require('../../../utils/query-utils');
|
||||
|
||||
const columnCastTpl = ctx => `date_part('epoch', ${ctx.column})`;
|
||||
|
||||
const filterOutSpecialNumericValues = ctx => `
|
||||
${ctx.column} != 'infinity'::float
|
||||
AND
|
||||
${ctx.column} != '-infinity'::float
|
||||
AND
|
||||
${ctx.column} != 'NaN'::float
|
||||
`;
|
||||
|
||||
const filteredQueryTpl = ctx => `
|
||||
/** Query to get min and max values from the query */
|
||||
const irqQueryTpl = ctx => `
|
||||
__cdb_filtered_source AS (
|
||||
SELECT *
|
||||
FROM (${ctx.query}) __cdb_filtered_source_query
|
||||
WHERE ${ctx.column} IS NOT NULL
|
||||
${ctx.isFloatColumn ? `AND ${filterOutSpecialNumericValues(ctx)}` : ''}
|
||||
)
|
||||
`;
|
||||
|
||||
const basicsQueryTpl = ctx => `
|
||||
WHERE ${utils.handleFloatColumn(ctx)} IS NOT NULL
|
||||
),
|
||||
__cdb_basics AS (
|
||||
SELECT
|
||||
max(${ctx.column}) AS __cdb_max_val, min(${ctx.column}) AS __cdb_min_val,
|
||||
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows
|
||||
max(${ctx.column}) AS __cdb_max_val,
|
||||
min(${ctx.column}) AS __cdb_min_val,
|
||||
count(1) AS __cdb_total_rows
|
||||
FROM __cdb_filtered_source
|
||||
)
|
||||
`;
|
||||
|
||||
const overrideBasicsQueryTpl = ctx => `
|
||||
__cdb_basics AS (
|
||||
SELECT
|
||||
max(${ctx.end}) AS __cdb_max_val, min(${ctx.start}) AS __cdb_min_val,
|
||||
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows
|
||||
FROM __cdb_filtered_source
|
||||
)
|
||||
`;
|
||||
|
||||
const iqrQueryTpl = ctx => `
|
||||
/* Query to calculate the number of bins (needs irqQueryTpl before it*/
|
||||
const binsQueryTpl = ctx => `
|
||||
__cdb_iqrange AS (
|
||||
SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr
|
||||
FROM (
|
||||
@ -49,10 +30,7 @@ const iqrQueryTpl = ctx => `
|
||||
WHERE quartile = 1 or quartile = 3
|
||||
GROUP BY quartile
|
||||
) __cdb_iqr
|
||||
)
|
||||
`;
|
||||
|
||||
const binsQueryTpl = ctx => `
|
||||
),
|
||||
__cdb_bins AS (
|
||||
SELECT
|
||||
CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0
|
||||
@ -70,83 +48,6 @@ const binsQueryTpl = ctx => `
|
||||
)
|
||||
`;
|
||||
|
||||
const overrideBinsQueryTpl = ctx => `
|
||||
__cdb_bins AS (
|
||||
SELECT ${ctx.override.bins} AS __cdb_bins_number
|
||||
)
|
||||
`;
|
||||
|
||||
const nullsQueryTpl = ctx => `
|
||||
__cdb_nulls AS (
|
||||
SELECT
|
||||
count(*) AS __cdb_nulls_count
|
||||
FROM (${ctx.query}) __cdb_histogram_nulls
|
||||
WHERE ${ctx.column} IS NULL
|
||||
)
|
||||
`;
|
||||
|
||||
const infinitiesQueryTpl = ctx => `
|
||||
__cdb_infinities AS (
|
||||
SELECT
|
||||
count(*) AS __cdb_infinities_count
|
||||
FROM (${ctx.query}) __cdb_infinities_query
|
||||
WHERE
|
||||
${ctx.column} = 'infinity'::float
|
||||
OR
|
||||
${ctx.column} = '-infinity'::float
|
||||
)
|
||||
`;
|
||||
|
||||
const nansQueryTpl = ctx => `
|
||||
__cdb_nans AS (
|
||||
SELECT
|
||||
count(*) AS __cdb_nans_count
|
||||
FROM (${ctx.query}) __cdb_nans_query
|
||||
WHERE ${ctx.column} = 'NaN'::float
|
||||
)
|
||||
`;
|
||||
|
||||
const specialNumericValuesColumnDefinitionTpl = () => `
|
||||
__cdb_infinities_count AS infinities_count,
|
||||
__cdb_nans_count AS nans_count
|
||||
`;
|
||||
|
||||
const specialNumericValuesCTETpl = () => `
|
||||
__cdb_infinities, __cdb_nans
|
||||
`;
|
||||
|
||||
const specialNumericValuesColumnTpl = () => `
|
||||
infinities_count, nans_count
|
||||
`;
|
||||
|
||||
const histogramQueryTpl = ctx => `
|
||||
SELECT
|
||||
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
|
||||
__cdb_bins_number AS bins_number,
|
||||
__cdb_nulls_count AS nulls_count,
|
||||
${ctx.isFloatColumn ? `${specialNumericValuesColumnDefinitionTpl()},` : ''}
|
||||
__cdb_avg_val AS avg_val,
|
||||
CASE WHEN __cdb_min_val = __cdb_max_val
|
||||
THEN 0
|
||||
ELSE GREATEST(
|
||||
1,
|
||||
LEAST(
|
||||
WIDTH_BUCKET(${ctx.column}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),
|
||||
__cdb_bins_number
|
||||
)
|
||||
) - 1
|
||||
END AS bin,
|
||||
min(${ctx.column})::numeric AS min,
|
||||
max(${ctx.column})::numeric AS max,
|
||||
avg(${ctx.column})::numeric AS avg,
|
||||
count(*) AS freq
|
||||
FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls, __cdb_bins
|
||||
${ctx.isFloatColumn ? `, ${specialNumericValuesCTETpl()}` : ''}
|
||||
GROUP BY bin, bins_number, bin_width, nulls_count, avg_val
|
||||
${ctx.isFloatColumn ? `, ${specialNumericValuesColumnTpl()}` : ''}
|
||||
ORDER BY bin
|
||||
`;
|
||||
|
||||
const BIN_MIN_NUMBER = 6;
|
||||
const BIN_MAX_NUMBER = 48;
|
||||
|
||||
@ -167,14 +68,14 @@ module.exports = class NumericHistogram extends BaseHistogram {
|
||||
|
||||
_buildQuery (psql, override, callback) {
|
||||
const histogramSql = this._buildQueryTpl({
|
||||
override: override,
|
||||
column: this._columnType === 'date' ? columnCastTpl({ column: this.column }) : this.column,
|
||||
column: this._columnType === 'date' ? utils.columnCastTpl({ column: this.column }) : this.column,
|
||||
isFloatColumn: this._columnType === 'float',
|
||||
query: this.query,
|
||||
start: this._getBinStart(override),
|
||||
end: this._getBinEnd(override),
|
||||
bins: this._getBinsCount(override),
|
||||
minBins: BIN_MIN_NUMBER,
|
||||
maxBins: BIN_MAX_NUMBER,
|
||||
maxBins: BIN_MAX_NUMBER
|
||||
});
|
||||
|
||||
debug(histogramSql);
|
||||
@ -182,19 +83,62 @@ module.exports = class NumericHistogram extends BaseHistogram {
|
||||
return callback(null, histogramSql);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ctx: Object with the following values
|
||||
* ctx.column -- Column for the histogram
|
||||
* ctx.isFloatColumn - Whether the column is float or not
|
||||
* ctx.query -- Subquery to extract data
|
||||
* ctx.start -- Start value for the bins. [>= end to force calculation]
|
||||
* ctx.end -- End value for the bins.
|
||||
* ctx.bins -- Numbers of bins to generate [<0 to force calculation]
|
||||
* ctx.minBins - If !full min bins to calculate [Optional]
|
||||
* ctx.maxBins - If !full max bins to calculate [Optional]
|
||||
*/
|
||||
_buildQueryTpl (ctx) {
|
||||
var extra_tables = ``;
|
||||
var extra_queries = ``;
|
||||
var extra_groupby = ``;
|
||||
|
||||
if (ctx.start >= ctx.end) {
|
||||
ctx.end = `__cdb_basics.__cdb_max_val`;
|
||||
ctx.start = `__cdb_basics.__cdb_min_val`;
|
||||
extra_groupby = `, __cdb_basics.__cdb_max_val, __cdb_basics.__cdb_min_val`;
|
||||
extra_tables = `, __cdb_basics`;
|
||||
extra_queries = `WITH ${irqQueryTpl(ctx)}`;
|
||||
}
|
||||
|
||||
if (ctx.bins <= 0) {
|
||||
ctx.bins = `__cdb_bins.__cdb_bins_number`;
|
||||
extra_groupby += `, __cdb_bins.__cdb_bins_number`;
|
||||
extra_tables += `, __cdb_bins`;
|
||||
extra_queries = `WITH ${irqQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`;
|
||||
}
|
||||
|
||||
return `
|
||||
WITH
|
||||
${filteredQueryTpl(ctx)},
|
||||
${this._hasOverridenRange(ctx.override) ? overrideBasicsQueryTpl(ctx) : basicsQueryTpl(ctx)},
|
||||
${this._hasOverridenBins(ctx.override) ?
|
||||
overrideBinsQueryTpl(ctx) :
|
||||
`${iqrQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`
|
||||
},
|
||||
${nullsQueryTpl(ctx)}
|
||||
${ctx.isFloatColumn ? `,${infinitiesQueryTpl(ctx)}, ${nansQueryTpl(ctx)}` : ''}
|
||||
${histogramQueryTpl(ctx)}
|
||||
`;
|
||||
${extra_queries}
|
||||
SELECT
|
||||
(${ctx.end} - ${ctx.start}) / ${ctx.bins}::float AS bin_width,
|
||||
${ctx.bins} as bins_number,
|
||||
${utils.countNULLs(ctx)} AS nulls_count,
|
||||
${utils.countInfinites(ctx)} AS infinities_count,
|
||||
${utils.countNaNs(ctx)} AS nans_count,
|
||||
min(${utils.handleFloatColumn(ctx)}) AS min,
|
||||
max(${utils.handleFloatColumn(ctx)}) AS max,
|
||||
avg(${utils.handleFloatColumn(ctx)}) AS avg,
|
||||
sum(CASE WHEN (${utils.handleFloatColumn(ctx)} is not NULL) THEN 1 ELSE 0 END) as freq,
|
||||
CASE WHEN ${ctx.start} = ${ctx.end}
|
||||
THEN 0
|
||||
ELSE GREATEST(1, LEAST(
|
||||
${ctx.bins},
|
||||
WIDTH_BUCKET(${utils.handleFloatColumn(ctx)}, ${ctx.start}, ${ctx.end}, ${ctx.bins}))) - 1
|
||||
END AS bin
|
||||
FROM
|
||||
(
|
||||
${ctx.query}
|
||||
) __cdb_filtered_source_query${extra_tables}
|
||||
GROUP BY bin${extra_groupby}
|
||||
ORDER BY bin;`;
|
||||
}
|
||||
|
||||
_hasOverridenBins (override) {
|
||||
@ -204,14 +148,31 @@ module.exports = class NumericHistogram extends BaseHistogram {
|
||||
_getSummary (result, override) {
|
||||
const firstRow = result.rows[0] || {};
|
||||
|
||||
var total_nulls = 0;
|
||||
var total_infinities = 0;
|
||||
var total_nans = 0;
|
||||
var total_avg = 0;
|
||||
var total_count = 0;
|
||||
|
||||
result.rows.forEach(function(row) {
|
||||
total_nulls += row.nulls_count;
|
||||
total_infinities += row.infinities_count;
|
||||
total_nans += row.nans_count;
|
||||
total_avg += row.avg * row.freq;
|
||||
total_count += row.freq;
|
||||
});
|
||||
if (total_count !== 0) {
|
||||
total_avg /= total_count;
|
||||
}
|
||||
|
||||
return {
|
||||
bin_width: firstRow.bin_width,
|
||||
bins_count: firstRow.bins_number,
|
||||
bins_start: this._populateBinStart(firstRow, override),
|
||||
nulls: firstRow.nulls_count,
|
||||
infinities: firstRow.infinities_count,
|
||||
nans: firstRow.nans_count,
|
||||
avg: firstRow.avg_val,
|
||||
nulls: total_nulls,
|
||||
infinities: total_infinities,
|
||||
nans: total_nans,
|
||||
avg: total_avg
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1,55 +1,38 @@
|
||||
var BaseOverviewsDataview = require('./base');
|
||||
var BaseDataview = require('../formula');
|
||||
var debug = require('debug')('windshaft:widget:formula:overview');
|
||||
const utils = require('../../../utils/query-utils');
|
||||
|
||||
var dot = require('dot');
|
||||
dot.templateSettings.strip = false;
|
||||
|
||||
var formulaQueryTpls = {
|
||||
'count': dot.template([
|
||||
'SELECT',
|
||||
'sum(_feature_count) AS result,',
|
||||
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
|
||||
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
|
||||
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count,',
|
||||
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
|
||||
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
|
||||
'FROM ({{=it._query}}) _cdb_formula'
|
||||
].join('\n')),
|
||||
'sum': dot.template([
|
||||
'SELECT',
|
||||
'sum({{=it._column}}*_feature_count) AS result,',
|
||||
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
|
||||
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
|
||||
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
|
||||
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
|
||||
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
|
||||
'FROM ({{=it._query}}) _cdb_formula',
|
||||
'{{?it._isFloatColumn}}WHERE',
|
||||
' {{=it._column}} != \'infinity\'::float',
|
||||
'AND',
|
||||
' {{=it._column}} != \'-infinity\'::float',
|
||||
'AND',
|
||||
' {{=it._column}} != \'NaN\'::float{{?}}'
|
||||
].join('\n')),
|
||||
'avg': dot.template([
|
||||
'SELECT',
|
||||
'sum({{=it._column}}*_feature_count)/sum(_feature_count) AS result,',
|
||||
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
|
||||
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
|
||||
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
|
||||
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
|
||||
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
|
||||
'FROM ({{=it._query}}) _cdb_formula',
|
||||
'{{?it._isFloatColumn}}WHERE',
|
||||
' {{=it._column}} != \'infinity\'::float',
|
||||
'AND',
|
||||
' {{=it._column}} != \'-infinity\'::float',
|
||||
'AND',
|
||||
' {{=it._column}} != \'NaN\'::float{{?}}'
|
||||
].join('\n')),
|
||||
const VALID_OPERATIONS = {
|
||||
count: true,
|
||||
sum: true,
|
||||
avg: true
|
||||
};
|
||||
|
||||
/** Formulae to calculate the end result using _feature_count from the overview table*/
|
||||
function dataviewResult(ctx) {
|
||||
switch (ctx.operation) {
|
||||
case 'count':
|
||||
return `sum(_feature_count)`;
|
||||
case 'sum':
|
||||
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)`;
|
||||
case 'avg':
|
||||
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)/sum(_feature_count) `;
|
||||
}
|
||||
return `${ctx.operation}(${utils.handleFloatColumn(ctx)})`;
|
||||
}
|
||||
|
||||
const formulaQueryTpl = ctx =>
|
||||
`SELECT
|
||||
${dataviewResult(ctx)} AS result,
|
||||
${utils.countNULLs(ctx)} AS nulls_count
|
||||
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
|
||||
${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
|
||||
FROM (${ctx.query}) __cdb_formula`;
|
||||
|
||||
function Formula(query, options, queryRewriter, queryRewriteData, params, queries) {
|
||||
BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries);
|
||||
this.column = options.column || '1';
|
||||
@ -65,36 +48,31 @@ module.exports = Formula;
|
||||
|
||||
Formula.prototype.sql = function (psql, override, callback) {
|
||||
var self = this;
|
||||
var formulaQueryTpl = formulaQueryTpls[this.operation];
|
||||
|
||||
if (formulaQueryTpl) {
|
||||
// supported formula for use with overviews
|
||||
if (this._isFloatColumn === null) {
|
||||
this._isFloatColumn = false;
|
||||
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
|
||||
if (!err && !!type) {
|
||||
self._isFloatColumn = type.float;
|
||||
}
|
||||
self.sql(psql, override, callback);
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
var formulaSql = formulaQueryTpl({
|
||||
_isFloatColumn: this._isFloatColumn,
|
||||
_query: this.rewrittenQuery(this.query),
|
||||
_operation: this.operation,
|
||||
_column: this.column
|
||||
});
|
||||
|
||||
callback = callback || override;
|
||||
|
||||
debug(formulaSql);
|
||||
|
||||
return callback(null, formulaSql);
|
||||
if (!VALID_OPERATIONS[this.operation]) {
|
||||
return this.defaultSql(psql, override, callback);
|
||||
}
|
||||
|
||||
if (this._isFloatColumn === null) {
|
||||
this._isFloatColumn = false;
|
||||
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
|
||||
if (!err && !!type) {
|
||||
self._isFloatColumn = type.float;
|
||||
}
|
||||
self.sql(psql, override, callback);
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
// default behaviour
|
||||
return this.defaultSql(psql, override, callback);
|
||||
var formulaSql = formulaQueryTpl({
|
||||
isFloatColumn: this._isFloatColumn,
|
||||
query: this.rewrittenQuery(this.query),
|
||||
operation: this.operation,
|
||||
column: this.column
|
||||
});
|
||||
|
||||
callback = callback || override;
|
||||
|
||||
debug(formulaSql);
|
||||
|
||||
return callback(null, formulaSql);
|
||||
};
|
||||
|
@ -40,3 +40,34 @@ module.exports.getAggregationMetadata = ctx => `
|
||||
geom_type AS type
|
||||
FROM rowEstimation, geometryType;
|
||||
`;
|
||||
|
||||
/** Cast the column to epoch */
|
||||
module.exports.columnCastTpl = function columnCastTpl(ctx) {
|
||||
return `date_part('epoch', ${ctx.column})`;
|
||||
};
|
||||
|
||||
/** If the column type is float, ignore any non numeric result (infinity / NaN) */
|
||||
module.exports.handleFloatColumn = function handleFloatColumn(ctx) {
|
||||
return `${!ctx.isFloatColumn ? `${ctx.column}` :
|
||||
`nullif(nullif(nullif(${ctx.column}, 'infinity'::float), '-infinity'::float), 'NaN'::float)`
|
||||
}`;
|
||||
};
|
||||
|
||||
/** Count NULL appearances */
|
||||
module.exports.countNULLs= function countNULLs(ctx) {
|
||||
return `sum(CASE WHEN (${ctx.column} IS NULL) THEN 1 ELSE 0 END)`;
|
||||
};
|
||||
|
||||
/** Count only infinity (positive and negative) appearances */
|
||||
module.exports.countInfinites = function countInfinites(ctx) {
|
||||
return `${!ctx.isFloatColumn ? `0` :
|
||||
`sum(CASE WHEN (${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float) THEN 1 ELSE 0 END)`
|
||||
}`;
|
||||
};
|
||||
|
||||
/** Count only NaNs appearances*/
|
||||
module.exports.countNaNs = function countNaNs(ctx) {
|
||||
return `${!ctx.isFloatColumn ? `0` :
|
||||
`sum(CASE WHEN (${ctx.column} = 'NaN'::float) THEN 1 ELSE 0 END)`
|
||||
}`;
|
||||
};
|
||||
|
@ -1,7 +1,7 @@
|
||||
{
|
||||
"private": true,
|
||||
"name": "windshaft-cartodb",
|
||||
"version": "4.2.1",
|
||||
"version": "4.3.1",
|
||||
"description": "A map tile server for CartoDB",
|
||||
"keywords": [
|
||||
"cartodb"
|
||||
|
@ -186,7 +186,7 @@ describe('histogram-dataview for date column type', function() {
|
||||
},
|
||||
minute_histogram: {
|
||||
source: {
|
||||
id: 'minute-histogram-source'
|
||||
id: 'minute-histogram-source-tz'
|
||||
},
|
||||
type: 'histogram',
|
||||
options: {
|
||||
@ -214,8 +214,8 @@ describe('histogram-dataview for date column type', function() {
|
||||
"params": {
|
||||
"query": [
|
||||
"select null::geometry the_geom_webmercator, date AS d",
|
||||
"from generate_series(",
|
||||
"'2007-02-15 01:00:00'::timestamptz, '2008-04-09 01:00:00'::timestamptz, '1 day'::interval",
|
||||
"from generate_series('2007-02-15 01:00:00+00'::timestamptz,",
|
||||
"'2008-04-09 01:00:00+00'::timestamptz, '1 day'::interval",
|
||||
") date"
|
||||
].join(' ')
|
||||
}
|
||||
@ -233,13 +233,13 @@ describe('histogram-dataview for date column type', function() {
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "minute-histogram-source",
|
||||
"id": "minute-histogram-source-tz",
|
||||
"type": "source",
|
||||
"params": {
|
||||
"query": [
|
||||
"select null::geometry the_geom_webmercator, date AS d",
|
||||
"from generate_series(",
|
||||
"'2007-02-15 23:50:00'::timestamp, '2007-02-16 00:10:00'::timestamp, '1 minute'::interval",
|
||||
"from generate_series('2007-02-15 23:50:00+00'::timestamptz,",
|
||||
"'2007-02-16 00:10:00+00'::timestamptz, '1 minute'::interval",
|
||||
") date"
|
||||
].join(' ')
|
||||
}
|
||||
@ -256,6 +256,7 @@ describe('histogram-dataview for date column type', function() {
|
||||
}];
|
||||
|
||||
dateHistogramsUseCases.forEach(function (test) {
|
||||
|
||||
it('should create a date histogram aggregated in months (EDT) ' + test.desc, function (done) {
|
||||
var OFFSET_EDT_IN_MINUTES = -4 * 60; // EDT Eastern Daylight Time (GMT-4) in minutes
|
||||
|
||||
@ -323,7 +324,7 @@ describe('histogram-dataview for date column type', function() {
|
||||
assert.ok(!err, err);
|
||||
assert.equal(dataview.type, 'histogram');
|
||||
assert.ok(dataview.bin_width > 0, 'Unexpected bin width: ' + dataview.bin_width);
|
||||
assert.equal(dataview.bins.length, 6);
|
||||
assert.equal(dataview.bins_count, 6);
|
||||
dataview.bins.forEach(function (bin) {
|
||||
assert.ok(bin.min <= bin.max, 'bin min < bin max: ' + JSON.stringify(bin));
|
||||
});
|
||||
@ -335,7 +336,7 @@ describe('histogram-dataview for date column type', function() {
|
||||
it('should cast overridden start and end to float to avoid out of range errors ' + test.desc, function (done) {
|
||||
var params = {
|
||||
start: -2145916800,
|
||||
end: 1009843199
|
||||
end: 1193792400
|
||||
};
|
||||
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
@ -348,27 +349,6 @@ describe('histogram-dataview for date column type', function() {
|
||||
});
|
||||
});
|
||||
|
||||
it('should return same histogram ' + test.desc, function (done) {
|
||||
var params = {
|
||||
start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum)
|
||||
end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum)
|
||||
};
|
||||
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
this.testClient.getDataview(test.dataviewId, {}, function (err, dataview) {
|
||||
assert.ok(!err, err);
|
||||
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
this.testClient.getDataview(test.dataviewId, params, function (err, filteredDataview) {
|
||||
assert.ok(!err, err);
|
||||
|
||||
assert.deepEqual(dataview, filteredDataview);
|
||||
done();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
it('should aggregate histogram overriding default offset to CEST ' + test.desc, function (done) {
|
||||
var OFFSET_CEST_IN_SECONDS = 2 * 3600; // Central European Summer Time (Daylight Saving Time)
|
||||
var OFFSET_CEST_IN_MINUTES = 2 * 60; // Central European Summer Time (Daylight Saving Time)
|
||||
@ -533,6 +513,26 @@ describe('histogram-dataview for date column type', function() {
|
||||
});
|
||||
});
|
||||
|
||||
it('should return same histogram ', function (done) {
|
||||
var params = {
|
||||
start: 1171501200, // 2007-02-15 01:00:00 = min(date_colum)
|
||||
end: 1207702800 // 2008-04-09 01:00:00 = max(date_colum)
|
||||
};
|
||||
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
this.testClient.getDataview('datetime_histogram_tz', {}, function (err, dataview) {
|
||||
assert.ok(!err, err);
|
||||
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
this.testClient.getDataview('datetime_histogram_tz', params, function (err, filteredDataview) {
|
||||
assert.ok(!err, err);
|
||||
|
||||
assert.deepEqual(dataview, filteredDataview);
|
||||
done();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('should find the best aggregation (automatic mode) to build the histogram', function (done) {
|
||||
var params = {};
|
||||
this.testClient = new TestClient(mapConfig, 1234);
|
||||
@ -640,7 +640,7 @@ describe('histogram-dataview for date column type', function() {
|
||||
|
||||
var dataviewWithDailyAggFixture = {
|
||||
aggregation: 'day',
|
||||
bin_width: 600,
|
||||
bin_width: 86400,
|
||||
bins_count: 2,
|
||||
bins_start: 1171497600,
|
||||
timestamp_start: 1171497600,
|
||||
@ -650,17 +650,17 @@ describe('histogram-dataview for date column type', function() {
|
||||
[{
|
||||
bin: 0,
|
||||
timestamp: 1171497600,
|
||||
min: 1171583400,
|
||||
max: 1171583940,
|
||||
avg: 1171583670,
|
||||
min: 1171497600,
|
||||
max: 1171497600,
|
||||
avg: 1171497600,
|
||||
freq: 10
|
||||
},
|
||||
{
|
||||
bin: 1,
|
||||
timestamp: 1171584000,
|
||||
min: 1171584000,
|
||||
max: 1171584600,
|
||||
avg: 1171584300,
|
||||
max: 1171584000,
|
||||
avg: 1171584000,
|
||||
freq: 11
|
||||
}],
|
||||
type: 'histogram'
|
||||
@ -687,19 +687,19 @@ describe('histogram-dataview for date column type', function() {
|
||||
|
||||
var dataviewWithDailyAggAndOffsetFixture = {
|
||||
aggregation: 'day',
|
||||
bin_width: 1200,
|
||||
bin_width: 86400,
|
||||
bins_count: 1,
|
||||
bins_start: 1171501200,
|
||||
timestamp_start: 1171497600,
|
||||
timestamp_start: 1171501200,
|
||||
nulls: 0,
|
||||
offset: -3600,
|
||||
bins:
|
||||
[{
|
||||
bin: 0,
|
||||
timestamp: 1171501200,
|
||||
min: 1171583400,
|
||||
max: 1171584600,
|
||||
avg: 1171584000,
|
||||
min: 1171501200,
|
||||
max: 1171501200,
|
||||
avg: 1171501200,
|
||||
freq: 21
|
||||
}],
|
||||
type: 'histogram'
|
||||
|
Loading…
Reference in New Issue
Block a user