Handle Infinity and NaN values for histograms
This commit is contained in:
parent
6c301403e3
commit
cd53eda0a5
@ -13,12 +13,27 @@ var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})");
|
||||
var BIN_MIN_NUMBER = 6;
|
||||
var BIN_MAX_NUMBER = 48;
|
||||
|
||||
var filteredQueryTpl = dot.template([
|
||||
'filtered_source AS (',
|
||||
' SELECT *',
|
||||
' FROM ({{=it._query}}) _cdb_filtered_source',
|
||||
' WHERE',
|
||||
' {{=it._column}} IS NOT NULL',
|
||||
' AND',
|
||||
' {{=it._column}} != \'infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'-infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'NaN\'::float',
|
||||
')'
|
||||
].join(' \n'));
|
||||
|
||||
var basicsQueryTpl = dot.template([
|
||||
'basics AS (',
|
||||
' SELECT',
|
||||
' max({{=it._column}}) AS max_val, min({{=it._column}}) AS min_val,',
|
||||
' avg({{=it._column}}) AS avg_val, count(1) AS total_rows',
|
||||
' FROM ({{=it._query}}) _cdb_basics',
|
||||
' FROM filtered_source',
|
||||
')'
|
||||
].join(' \n'));
|
||||
|
||||
@ -27,7 +42,7 @@ var overrideBasicsQueryTpl = dot.template([
|
||||
' SELECT',
|
||||
' max({{=it._end}}) AS max_val, min({{=it._start}}) AS min_val,',
|
||||
' avg({{=it._column}}) AS avg_val, count(1) AS total_rows',
|
||||
' FROM ({{=it._query}}) _cdb_basics',
|
||||
' FROM filtered_source',
|
||||
')'
|
||||
].join('\n'));
|
||||
|
||||
@ -38,7 +53,7 @@ var iqrQueryTpl = dot.template([
|
||||
' SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (',
|
||||
' SELECT {{=it._column}} AS _cdb_iqr_column, ntile(4) over (order by {{=it._column}}',
|
||||
' ) AS quartile',
|
||||
' FROM ({{=it._query}}) _cdb_rank) _cdb_quartiles',
|
||||
' FROM filtered_source) _cdb_quartiles',
|
||||
' WHERE quartile = 1 or quartile = 3',
|
||||
' GROUP BY quartile',
|
||||
' ) _cdb_iqr',
|
||||
@ -57,7 +72,7 @@ var binsQueryTpl = dot.template([
|
||||
' )',
|
||||
' )',
|
||||
' END AS bins_number',
|
||||
' FROM basics, iqrange, ({{=it._query}}) _cdb_bins',
|
||||
' FROM basics, iqrange, filtered_source',
|
||||
' LIMIT 1',
|
||||
')'
|
||||
].join('\n'));
|
||||
@ -77,11 +92,34 @@ var nullsQueryTpl = dot.template([
|
||||
')'
|
||||
].join('\n'));
|
||||
|
||||
var infinitiesQueryTpl = dot.template([
|
||||
'infinities AS (',
|
||||
' SELECT',
|
||||
' count(*) AS infinities_count',
|
||||
' FROM ({{=it._query}}) _cdb_histogram_infinities',
|
||||
' WHERE',
|
||||
' {{=it._column}} = \'infinity\'::float',
|
||||
' OR',
|
||||
' {{=it._column}} = \'-infinity\'::float',
|
||||
')'
|
||||
].join('\n'));
|
||||
|
||||
var nansQueryTpl = dot.template([
|
||||
'nans AS (',
|
||||
' SELECT',
|
||||
' count(*) AS nans_count',
|
||||
' FROM ({{=it._query}}) _cdb_histogram_infinities',
|
||||
' WHERE {{=it._column}} = \'NaN\'::float',
|
||||
')'
|
||||
].join('\n'));
|
||||
|
||||
var histogramQueryTpl = dot.template([
|
||||
'SELECT',
|
||||
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
|
||||
' bins_number,',
|
||||
' nulls_count,',
|
||||
' infinities_count,',
|
||||
' nans_count,',
|
||||
' avg_val,',
|
||||
' CASE WHEN min_val = max_val',
|
||||
' THEN 0',
|
||||
@ -91,9 +129,8 @@ var histogramQueryTpl = dot.template([
|
||||
' max({{=it._column}})::numeric AS max,',
|
||||
' avg({{=it._column}})::numeric AS avg,',
|
||||
' count(*) AS freq',
|
||||
'FROM ({{=it._query}}) _cdb_histogram, basics, nulls, bins',
|
||||
'WHERE {{=it._column}} IS NOT NULL',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count, avg_val',
|
||||
'FROM filtered_source, basics, nulls, infinities, nans, bins',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count, infinities_count, nans_count, avg_val',
|
||||
'ORDER BY bin'
|
||||
].join('\n'));
|
||||
|
||||
@ -168,7 +205,12 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
|
||||
var _query = this.query;
|
||||
|
||||
var basicsQuery, binsQuery;
|
||||
var filteredQuery, basicsQuery, binsQuery;
|
||||
|
||||
filteredQuery = filteredQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
});
|
||||
|
||||
if (override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins')) {
|
||||
debug('overriding with %j', override);
|
||||
@ -215,11 +257,20 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
var histogramSql = [
|
||||
"WITH",
|
||||
[
|
||||
filteredQuery,
|
||||
basicsQuery,
|
||||
binsQuery,
|
||||
nullsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
}),
|
||||
infinitiesQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
}),
|
||||
nansQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
].join(',\n'),
|
||||
histogramQueryTpl({
|
||||
@ -241,6 +292,8 @@ Histogram.prototype.format = function(result, override) {
|
||||
var width = getWidth(override);
|
||||
var binsStart = getBinStart(override);
|
||||
var nulls = 0;
|
||||
var infinities = 0;
|
||||
var nans = 0;
|
||||
var avg;
|
||||
|
||||
if (result.rows.length) {
|
||||
@ -249,10 +302,12 @@ Histogram.prototype.format = function(result, override) {
|
||||
width = firstRow.bin_width || width;
|
||||
avg = firstRow.avg_val;
|
||||
nulls = firstRow.nulls_count;
|
||||
infinities = firstRow.infinities_count;
|
||||
nans = firstRow.nans_count;
|
||||
binsStart = override.hasOwnProperty('start') ? getBinStart(override) : firstRow.min;
|
||||
|
||||
buckets = result.rows.map(function(row) {
|
||||
return _.omit(row, 'bins_number', 'bin_width', 'nulls_count', 'avg_val');
|
||||
return _.omit(row, 'bins_number', 'bin_width', 'nulls_count', 'infinities_count', 'nans_count', 'avg_val');
|
||||
});
|
||||
}
|
||||
|
||||
@ -261,6 +316,8 @@ Histogram.prototype.format = function(result, override) {
|
||||
bins_count: binsCount,
|
||||
bins_start: binsStart,
|
||||
nulls: nulls,
|
||||
infinities: infinities,
|
||||
nans: nans,
|
||||
avg: avg,
|
||||
bins: buckets
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user