Histogram: Speed up IRQ calculation

This commit is contained in:
Raul Marin 2019-03-04 14:09:30 +01:00 committed by Raul Marin
parent e65872d5df
commit 6241b23d4f

View File

@ -15,24 +15,15 @@ const irqQueryTpl = ctx => `
SELECT
max(${ctx.column}) AS __cdb_max_val,
min(${ctx.column}) AS __cdb_min_val,
count(1) AS __cdb_total_rows
count(1) AS __cdb_total_rows,
${ctx.irq ? ctx.irq : `0`} AS __cdb_iqr
FROM __cdb_filtered_source
)
`;
/* Query to calculate the number of bins (needs irqQueryTpl before it*/
/* Query to calculate the number of bins (needs irqQueryTpl before it.
* It uses the FreedmanDiaconis rule to calculate the witdh of the bins */
const binsQueryTpl = ctx => `
__cdb_iqrange AS (
SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr
FROM (
SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (
SELECT ${ctx.column} AS _cdb_iqr_column, ntile(4) over (order by ${ctx.column}
) AS quartile
FROM __cdb_filtered_source) _cdb_quartiles
WHERE quartile = 1 or quartile = 3
GROUP BY 1
) __cdb_iqr
),
__cdb_bins AS (
SELECT
CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0
@ -45,7 +36,7 @@ const binsQueryTpl = ctx => `
)
)
END AS __cdb_bins_number
FROM __cdb_basics, __cdb_iqrange, __cdb_filtered_source
FROM __cdb_basics, __cdb_filtered_source
LIMIT 1
)
`;
@ -118,6 +109,7 @@ module.exports = class NumericHistogram extends BaseHistogram {
if (ctx.bins <= 0) {
ctx.bins = `__cdb_bins.__cdb_bins_number`;
ctx.irq = `percentile_disc(0.75) within group (order by ${ctx.column}) - percentile_disc(0.25) within group (order by ${ctx.column})`;
extra_groupby += `, __cdb_bins.__cdb_bins_number`;
extra_tables += `, __cdb_bins`;
extra_queries = `WITH ${irqQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`;