Handle special float values only if column is float
This commit is contained in:
parent
20d7f1a7c5
commit
b1ac5b8ca9
@ -33,6 +33,12 @@ var FLOAT_OIDS = {
|
||||
701: true
|
||||
};
|
||||
|
||||
var DATE_OIDS = {
|
||||
1082: true,
|
||||
1114: true,
|
||||
1184: true
|
||||
};
|
||||
|
||||
var columnTypeQueryTpl = dot.template(
|
||||
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_column_type limit 1'
|
||||
);
|
||||
@ -55,6 +61,7 @@ BaseDataview.prototype.getColumnType = function (psql, column, query, callback)
|
||||
|
||||
function getPGTypeName (pgType) {
|
||||
return {
|
||||
float: FLOAT_OIDS.hasOwnProperty(pgType)
|
||||
float: FLOAT_OIDS.hasOwnProperty(pgType),
|
||||
date: DATE_OIDS.hasOwnProperty(pgType)
|
||||
};
|
||||
}
|
||||
|
@ -5,9 +5,6 @@ var debug = require('debug')('windshaft:dataview:histogram');
|
||||
var dot = require('dot');
|
||||
dot.templateSettings.strip = false;
|
||||
|
||||
var columnTypeQueryTpl = dot.template(
|
||||
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_histogram_column_type limit 1'
|
||||
);
|
||||
var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})");
|
||||
|
||||
var BIN_MIN_NUMBER = 6;
|
||||
@ -19,12 +16,12 @@ var filteredQueryTpl = dot.template([
|
||||
' FROM ({{=it._query}}) _cdb_filtered_source',
|
||||
' WHERE',
|
||||
' {{=it._column}} IS NOT NULL',
|
||||
' AND',
|
||||
' {{?it._isFloatColumn}}AND',
|
||||
' {{=it._column}} != \'infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'-infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'NaN\'::float',
|
||||
' {{=it._column}} != \'NaN\'::float{{?}}',
|
||||
')'
|
||||
].join(' \n'));
|
||||
|
||||
@ -118,8 +115,8 @@ var histogramQueryTpl = dot.template([
|
||||
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
|
||||
' bins_number,',
|
||||
' nulls_count,',
|
||||
' infinities_count,',
|
||||
' nans_count,',
|
||||
' {{?it._isFloatColumn}}infinities_count,',
|
||||
' nans_count,{{?}}',
|
||||
' avg_val,',
|
||||
' CASE WHEN min_val = max_val',
|
||||
' THEN 0',
|
||||
@ -129,8 +126,9 @@ var histogramQueryTpl = dot.template([
|
||||
' max({{=it._column}})::numeric AS max,',
|
||||
' avg({{=it._column}})::numeric AS avg,',
|
||||
' count(*) AS freq',
|
||||
'FROM filtered_source, basics, nulls, infinities, nans, bins',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count, infinities_count, nans_count, avg_val',
|
||||
'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}}, infinities, nans{{?}}',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count,',
|
||||
' avg_val{{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
|
||||
'ORDER BY bin'
|
||||
].join('\n'));
|
||||
|
||||
@ -164,55 +162,49 @@ Histogram.prototype.constructor = Histogram;
|
||||
|
||||
module.exports = Histogram;
|
||||
|
||||
var DATE_OIDS = {
|
||||
1082: true,
|
||||
1114: true,
|
||||
1184: true
|
||||
};
|
||||
|
||||
Histogram.prototype.sql = function(psql, override, callback) {
|
||||
var self = this;
|
||||
|
||||
if (!callback) {
|
||||
callback = override;
|
||||
override = {};
|
||||
}
|
||||
|
||||
var self = this;
|
||||
|
||||
var _column = this.column;
|
||||
|
||||
var columnTypeQuery = columnTypeQueryTpl({
|
||||
column: _column, query: this.queries.no_filters
|
||||
});
|
||||
|
||||
if (this._columnType === null) {
|
||||
psql.query(columnTypeQuery, function(err, result) {
|
||||
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
|
||||
// assume numeric, will fail later
|
||||
self._columnType = 'numeric';
|
||||
if (!err && !!result.rows[0]) {
|
||||
var pgType = result.rows[0].pg_typeof;
|
||||
if (DATE_OIDS.hasOwnProperty(pgType)) {
|
||||
self._columnType = 'date';
|
||||
}
|
||||
if (!err && !!type) {
|
||||
self._columnType = Object.keys(type).find(function (key) {
|
||||
return type[key];
|
||||
});
|
||||
}
|
||||
self.sql(psql, override, callback);
|
||||
}, true); // use read-only transaction
|
||||
return null;
|
||||
}
|
||||
|
||||
var histogramSql = this._buildQuery(override);
|
||||
|
||||
return callback(null, histogramSql);
|
||||
};
|
||||
|
||||
Histogram.prototype._buildQuery = function (override) {
|
||||
var filteredQuery, basicsQuery, binsQuery;
|
||||
var _column = this.column;
|
||||
var _query = this.query;
|
||||
|
||||
if (this._columnType === 'date') {
|
||||
_column = columnCastTpl({column: _column});
|
||||
}
|
||||
|
||||
var _query = this.query;
|
||||
|
||||
var filteredQuery, basicsQuery, binsQuery;
|
||||
|
||||
filteredQuery = filteredQueryTpl({
|
||||
_isFloatColumn: this._columnType === 'float',
|
||||
_query: _query,
|
||||
_column: _column
|
||||
});
|
||||
|
||||
if (override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins')) {
|
||||
if (this._shouldOverride(override)) {
|
||||
debug('overriding with %j', override);
|
||||
basicsQuery = overrideBasicsQueryTpl({
|
||||
_query: _query,
|
||||
@ -232,7 +224,7 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
_column: _column
|
||||
});
|
||||
|
||||
if (override && _.has(override, 'bins')) {
|
||||
if (this._shouldOverrideBins(override)) {
|
||||
binsQuery = [
|
||||
overrideBinsQueryTpl({
|
||||
_bins: override.bins
|
||||
@ -253,17 +245,18 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
}
|
||||
}
|
||||
|
||||
var cteSql = [
|
||||
filteredQuery,
|
||||
basicsQuery,
|
||||
binsQuery,
|
||||
nullsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
];
|
||||
|
||||
var histogramSql = [
|
||||
"WITH",
|
||||
[
|
||||
filteredQuery,
|
||||
basicsQuery,
|
||||
binsQuery,
|
||||
nullsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
}),
|
||||
if (this._columnType === 'float') {
|
||||
cteSql.push(
|
||||
infinitiesQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
@ -272,8 +265,14 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
].join(',\n'),
|
||||
);
|
||||
}
|
||||
|
||||
var histogramSql = [
|
||||
"WITH",
|
||||
cteSql.join(',\n'),
|
||||
histogramQueryTpl({
|
||||
_isFloatColumn: this._columnType === 'float',
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
@ -281,7 +280,15 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
|
||||
debug(histogramSql);
|
||||
|
||||
return callback(null, histogramSql);
|
||||
return histogramSql;
|
||||
};
|
||||
|
||||
Histogram.prototype._shouldOverride = function (override) {
|
||||
return override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins');
|
||||
};
|
||||
|
||||
Histogram.prototype._shouldOverrideBins = function (override) {
|
||||
return override && _.has(override, 'bins');
|
||||
};
|
||||
|
||||
Histogram.prototype.format = function(result, override) {
|
||||
|
@ -1,14 +1,11 @@
|
||||
var _ = require('underscore');
|
||||
var BaseOverviewsDataview = require('./base');
|
||||
var BaseDataview = require('../histogram');
|
||||
var debug = require('debug')('windshaft:dataview:histogram:overview');
|
||||
|
||||
var dot = require('dot');
|
||||
dot.templateSettings.strip = false;
|
||||
|
||||
var columnTypeQueryTpl = dot.template(
|
||||
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_histogram_column_type limit 1'
|
||||
);
|
||||
|
||||
var BIN_MIN_NUMBER = 6;
|
||||
var BIN_MAX_NUMBER = 48;
|
||||
|
||||
@ -18,12 +15,12 @@ var filteredQueryTpl = dot.template([
|
||||
' FROM ({{=it._query}}) _cdb_filtered_source',
|
||||
' WHERE',
|
||||
' {{=it._column}} IS NOT NULL',
|
||||
' AND',
|
||||
' {{?it._isFloatColumn}}AND',
|
||||
' {{=it._column}} != \'infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'-infinity\'::float',
|
||||
' AND',
|
||||
' {{=it._column}} != \'NaN\'::float',
|
||||
' {{=it._column}} != \'NaN\'::float{{?}}',
|
||||
')'
|
||||
].join(' \n'));
|
||||
|
||||
@ -117,8 +114,8 @@ var histogramQueryTpl = dot.template([
|
||||
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
|
||||
' bins_number,',
|
||||
' nulls_count,',
|
||||
' infinities_count,',
|
||||
' nans_count,',
|
||||
' {{?it._isFloatColumn}}infinities_count,',
|
||||
' nans_count,{{?}}',
|
||||
' avg_val,',
|
||||
' CASE WHEN min_val = max_val',
|
||||
' THEN 0',
|
||||
@ -128,8 +125,9 @@ var histogramQueryTpl = dot.template([
|
||||
' max({{=it._column}})::numeric AS max,',
|
||||
' sum({{=it._column}}*_feature_count)/sum(_feature_count)::numeric AS avg,',
|
||||
' sum(_feature_count) AS freq',
|
||||
'FROM filtered_source, basics, nulls, infinities, nans, bins',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count, infinities_count, nans_count, avg_val',
|
||||
'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}},infinities, nans{{?}}',
|
||||
'GROUP BY bin, bins_number, bin_width, nulls_count, avg_val',
|
||||
' {{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
|
||||
'ORDER BY bin'
|
||||
].join('\n'));
|
||||
|
||||
@ -149,36 +147,23 @@ Histogram.prototype.constructor = Histogram;
|
||||
|
||||
module.exports = Histogram;
|
||||
|
||||
|
||||
var DATE_OIDS = {
|
||||
1082: true,
|
||||
1114: true,
|
||||
1184: true
|
||||
};
|
||||
|
||||
Histogram.prototype.sql = function(psql, override, callback) {
|
||||
var self = this;
|
||||
|
||||
if (!callback) {
|
||||
callback = override;
|
||||
override = {};
|
||||
}
|
||||
|
||||
var self = this;
|
||||
|
||||
var _column = this.column;
|
||||
|
||||
var columnTypeQuery = columnTypeQueryTpl({
|
||||
column: _column, query: this.rewrittenQuery(this.queries.no_filters)
|
||||
});
|
||||
|
||||
if (this._columnType === null) {
|
||||
psql.query(columnTypeQuery, function(err, result) {
|
||||
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
|
||||
// assume numeric, will fail later
|
||||
self._columnType = 'numeric';
|
||||
if (!err && !!result.rows[0]) {
|
||||
var pgType = result.rows[0].pg_typeof;
|
||||
if (DATE_OIDS.hasOwnProperty(pgType)) {
|
||||
self._columnType = 'date';
|
||||
}
|
||||
if (!err && !!type) {
|
||||
self._columnType = Object.keys(type).find(function (key) {
|
||||
return type[key];
|
||||
});
|
||||
}
|
||||
self.sql(psql, override, callback);
|
||||
}, true); // use read-only transaction
|
||||
@ -191,16 +176,24 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
return this.defaultSql(psql, override, callback);
|
||||
}
|
||||
|
||||
var histogramSql = this._buildQuery(override);
|
||||
|
||||
return callback(null, histogramSql);
|
||||
};
|
||||
|
||||
Histogram.prototype._buildQuery = function (override) {
|
||||
var filteredQuery, basicsQuery, binsQuery;
|
||||
var _column = this.column;
|
||||
var _query = this.rewrittenQuery(this.query);
|
||||
|
||||
var filteredQuery, basicsQuery, binsQuery;
|
||||
|
||||
filteredQuery = filteredQueryTpl({
|
||||
_isFloatColumn: this._columnType === 'float',
|
||||
_query: _query,
|
||||
_column: _column
|
||||
});
|
||||
|
||||
if (override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins')) {
|
||||
if (this._shouldOverride(override)) {
|
||||
debug('overriding with %j', override);
|
||||
basicsQuery = overrideBasicsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column,
|
||||
@ -219,7 +212,7 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
_column: _column
|
||||
});
|
||||
|
||||
if (override && _.has(override, 'bins')) {
|
||||
if (this._shouldOverrideBins(override)) {
|
||||
binsQuery = [
|
||||
overrideBinsQueryTpl({
|
||||
_bins: override.bins
|
||||
@ -240,17 +233,18 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
}
|
||||
}
|
||||
|
||||
var cteSql = [
|
||||
filteredQuery,
|
||||
basicsQuery,
|
||||
binsQuery,
|
||||
nullsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
];
|
||||
|
||||
var histogramSql = [
|
||||
"WITH",
|
||||
[
|
||||
filteredQuery,
|
||||
basicsQuery,
|
||||
binsQuery,
|
||||
nullsQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
}),
|
||||
if (this._columnType === 'float') {
|
||||
cteSql.push(
|
||||
infinitiesQueryTpl({
|
||||
_query: _query,
|
||||
_column: _column
|
||||
@ -259,12 +253,30 @@ Histogram.prototype.sql = function(psql, override, callback) {
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
].join(',\n'),
|
||||
);
|
||||
}
|
||||
|
||||
var histogramSql = [
|
||||
"WITH",
|
||||
cteSql.join(',\n'),
|
||||
histogramQueryTpl({
|
||||
_isFloatColumn: this._columnType === 'float',
|
||||
_query: _query,
|
||||
_column: _column
|
||||
})
|
||||
].join('\n');
|
||||
|
||||
return callback(null, histogramSql);
|
||||
debug(histogramSql);
|
||||
|
||||
return histogramSql;
|
||||
};
|
||||
|
||||
Histogram.prototype._shouldOverride = function (override) {
|
||||
return override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins');
|
||||
};
|
||||
|
||||
Histogram.prototype._shouldOverrideBins = function (override) {
|
||||
return override && _.has(override, 'bins');
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user