Numeric histogram: Simplify bin calculation
This commit is contained in:
parent
6241b23d4f
commit
730076469e
@ -4,41 +4,37 @@ const BaseHistogram = require('./base-histogram');
|
||||
const debug = require('debug')('windshaft:dataview:numeric-histogram');
|
||||
const utils = require('../../../utils/query-utils');
|
||||
|
||||
/** Query to get min and max values from the query */
|
||||
/** Query to get min, max, count and (if necessary) bin number of the query */
|
||||
const irqQueryTpl = ctx => `
|
||||
__cdb_filtered_source AS (
|
||||
SELECT *
|
||||
FROM (${ctx.query}) __cdb_filtered_source_query
|
||||
WHERE ${utils.handleFloatColumn(ctx)} IS NOT NULL
|
||||
),
|
||||
__cdb_basics AS (
|
||||
SELECT
|
||||
*,
|
||||
CASE
|
||||
WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0 THEN 1
|
||||
ELSE GREATEST(
|
||||
LEAST(
|
||||
${ctx.minBins},
|
||||
__cdb_total_rows::int),
|
||||
LEAST(
|
||||
${ctx.maxBins},
|
||||
((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3)))::int)
|
||||
)
|
||||
END AS __cdb_bins_number
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
max(${ctx.column}) AS __cdb_max_val,
|
||||
min(${ctx.column}) AS __cdb_min_val,
|
||||
count(1) AS __cdb_total_rows,
|
||||
${ctx.irq ? ctx.irq : `0`} AS __cdb_iqr
|
||||
FROM __cdb_filtered_source
|
||||
)
|
||||
`;
|
||||
|
||||
/* Query to calculate the number of bins (needs irqQueryTpl before it.
|
||||
* It uses the Freedman–Diaconis rule to calculate the witdh of the bins */
|
||||
const binsQueryTpl = ctx => `
|
||||
__cdb_bins AS (
|
||||
SELECT
|
||||
CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0
|
||||
THEN 1
|
||||
ELSE GREATEST(
|
||||
LEAST(${ctx.minBins}, CAST(__cdb_total_rows AS INT)),
|
||||
LEAST(
|
||||
CAST(((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3))) AS INT),
|
||||
${ctx.maxBins}
|
||||
)
|
||||
)
|
||||
END AS __cdb_bins_number
|
||||
FROM __cdb_basics, __cdb_filtered_source
|
||||
LIMIT 1
|
||||
)
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM (${ctx.query}) __cdb_filtered_source_query
|
||||
WHERE ${utils.handleFloatColumn(ctx)} IS NOT NULL
|
||||
) __cdb_filtered_source
|
||||
) __cdb_basics_2
|
||||
)
|
||||
`;
|
||||
|
||||
const BIN_MIN_NUMBER = 6;
|
||||
@ -108,11 +104,11 @@ module.exports = class NumericHistogram extends BaseHistogram {
|
||||
}
|
||||
|
||||
if (ctx.bins <= 0) {
|
||||
ctx.bins = `__cdb_bins.__cdb_bins_number`;
|
||||
ctx.irq = `percentile_disc(0.75) within group (order by ${ctx.column}) - percentile_disc(0.25) within group (order by ${ctx.column})`;
|
||||
extra_groupby += `, __cdb_bins.__cdb_bins_number`;
|
||||
extra_tables += `, __cdb_bins`;
|
||||
extra_queries = `WITH ${irqQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`;
|
||||
ctx.bins = `__cdb_basics.__cdb_bins_number`;
|
||||
ctx.irq = `percentile_disc(0.75) within group (order by ${ctx.column})
|
||||
- percentile_disc(0.25) within group (order by ${ctx.column})`;
|
||||
extra_groupby += `, __cdb_basics.__cdb_bins_number`;
|
||||
extra_queries = `WITH ${irqQueryTpl(ctx)}`;
|
||||
}
|
||||
|
||||
return `
|
||||
|
Loading…
Reference in New Issue
Block a user