Handle special float values only if column is float

This commit is contained in:
Daniel García Aubert 2017-06-16 12:57:46 +02:00
parent 20d7f1a7c5
commit b1ac5b8ca9
3 changed files with 120 additions and 94 deletions

View File

@ -33,6 +33,12 @@ var FLOAT_OIDS = {
701: true
};
var DATE_OIDS = {
1082: true,
1114: true,
1184: true
};
var columnTypeQueryTpl = dot.template(
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_column_type limit 1'
);
@ -55,6 +61,7 @@ BaseDataview.prototype.getColumnType = function (psql, column, query, callback)
function getPGTypeName (pgType) {
return {
float: FLOAT_OIDS.hasOwnProperty(pgType)
float: FLOAT_OIDS.hasOwnProperty(pgType),
date: DATE_OIDS.hasOwnProperty(pgType)
};
}

View File

@ -5,9 +5,6 @@ var debug = require('debug')('windshaft:dataview:histogram');
var dot = require('dot');
dot.templateSettings.strip = false;
var columnTypeQueryTpl = dot.template(
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_histogram_column_type limit 1'
);
var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})");
var BIN_MIN_NUMBER = 6;
@ -19,12 +16,12 @@ var filteredQueryTpl = dot.template([
' FROM ({{=it._query}}) _cdb_filtered_source',
' WHERE',
' {{=it._column}} IS NOT NULL',
' AND',
' {{?it._isFloatColumn}}AND',
' {{=it._column}} != \'infinity\'::float',
' AND',
' {{=it._column}} != \'-infinity\'::float',
' AND',
' {{=it._column}} != \'NaN\'::float',
' {{=it._column}} != \'NaN\'::float{{?}}',
')'
].join(' \n'));
@ -118,8 +115,8 @@ var histogramQueryTpl = dot.template([
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
' bins_number,',
' nulls_count,',
' infinities_count,',
' nans_count,',
' {{?it._isFloatColumn}}infinities_count,',
' nans_count,{{?}}',
' avg_val,',
' CASE WHEN min_val = max_val',
' THEN 0',
@ -129,8 +126,9 @@ var histogramQueryTpl = dot.template([
' max({{=it._column}})::numeric AS max,',
' avg({{=it._column}})::numeric AS avg,',
' count(*) AS freq',
'FROM filtered_source, basics, nulls, infinities, nans, bins',
'GROUP BY bin, bins_number, bin_width, nulls_count, infinities_count, nans_count, avg_val',
'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}}, infinities, nans{{?}}',
'GROUP BY bin, bins_number, bin_width, nulls_count,',
' avg_val{{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
'ORDER BY bin'
].join('\n'));
@ -164,55 +162,49 @@ Histogram.prototype.constructor = Histogram;
module.exports = Histogram;
var DATE_OIDS = {
1082: true,
1114: true,
1184: true
};
Histogram.prototype.sql = function(psql, override, callback) {
var self = this;
if (!callback) {
callback = override;
override = {};
}
var self = this;
var _column = this.column;
var columnTypeQuery = columnTypeQueryTpl({
column: _column, query: this.queries.no_filters
});
if (this._columnType === null) {
psql.query(columnTypeQuery, function(err, result) {
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
// assume numeric, will fail later
self._columnType = 'numeric';
if (!err && !!result.rows[0]) {
var pgType = result.rows[0].pg_typeof;
if (DATE_OIDS.hasOwnProperty(pgType)) {
self._columnType = 'date';
}
if (!err && !!type) {
self._columnType = Object.keys(type).find(function (key) {
return type[key];
});
}
self.sql(psql, override, callback);
}, true); // use read-only transaction
return null;
}
var histogramSql = this._buildQuery(override);
return callback(null, histogramSql);
};
Histogram.prototype._buildQuery = function (override) {
var filteredQuery, basicsQuery, binsQuery;
var _column = this.column;
var _query = this.query;
if (this._columnType === 'date') {
_column = columnCastTpl({column: _column});
}
var _query = this.query;
var filteredQuery, basicsQuery, binsQuery;
filteredQuery = filteredQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
});
if (override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins')) {
if (this._shouldOverride(override)) {
debug('overriding with %j', override);
basicsQuery = overrideBasicsQueryTpl({
_query: _query,
@ -232,7 +224,7 @@ Histogram.prototype.sql = function(psql, override, callback) {
_column: _column
});
if (override && _.has(override, 'bins')) {
if (this._shouldOverrideBins(override)) {
binsQuery = [
overrideBinsQueryTpl({
_bins: override.bins
@ -253,17 +245,18 @@ Histogram.prototype.sql = function(psql, override, callback) {
}
}
var histogramSql = [
"WITH",
[
var cteSql = [
filteredQuery,
basicsQuery,
binsQuery,
nullsQueryTpl({
_query: _query,
_column: _column
}),
})
];
if (this._columnType === 'float') {
cteSql.push(
infinitiesQueryTpl({
_query: _query,
_column: _column
@ -272,8 +265,14 @@ Histogram.prototype.sql = function(psql, override, callback) {
_query: _query,
_column: _column
})
].join(',\n'),
);
}
var histogramSql = [
"WITH",
cteSql.join(',\n'),
histogramQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
})
@ -281,7 +280,15 @@ Histogram.prototype.sql = function(psql, override, callback) {
debug(histogramSql);
return callback(null, histogramSql);
return histogramSql;
};
Histogram.prototype._shouldOverride = function (override) {
return override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins');
};
Histogram.prototype._shouldOverrideBins = function (override) {
return override && _.has(override, 'bins');
};
Histogram.prototype.format = function(result, override) {

View File

@ -1,14 +1,11 @@
var _ = require('underscore');
var BaseOverviewsDataview = require('./base');
var BaseDataview = require('../histogram');
var debug = require('debug')('windshaft:dataview:histogram:overview');
var dot = require('dot');
dot.templateSettings.strip = false;
var columnTypeQueryTpl = dot.template(
'SELECT pg_typeof({{=it.column}})::oid FROM ({{=it.query}}) _cdb_histogram_column_type limit 1'
);
var BIN_MIN_NUMBER = 6;
var BIN_MAX_NUMBER = 48;
@ -18,12 +15,12 @@ var filteredQueryTpl = dot.template([
' FROM ({{=it._query}}) _cdb_filtered_source',
' WHERE',
' {{=it._column}} IS NOT NULL',
' AND',
' {{?it._isFloatColumn}}AND',
' {{=it._column}} != \'infinity\'::float',
' AND',
' {{=it._column}} != \'-infinity\'::float',
' AND',
' {{=it._column}} != \'NaN\'::float',
' {{=it._column}} != \'NaN\'::float{{?}}',
')'
].join(' \n'));
@ -117,8 +114,8 @@ var histogramQueryTpl = dot.template([
' (max_val - min_val) / cast(bins_number as float) AS bin_width,',
' bins_number,',
' nulls_count,',
' infinities_count,',
' nans_count,',
' {{?it._isFloatColumn}}infinities_count,',
' nans_count,{{?}}',
' avg_val,',
' CASE WHEN min_val = max_val',
' THEN 0',
@ -128,8 +125,9 @@ var histogramQueryTpl = dot.template([
' max({{=it._column}})::numeric AS max,',
' sum({{=it._column}}*_feature_count)/sum(_feature_count)::numeric AS avg,',
' sum(_feature_count) AS freq',
'FROM filtered_source, basics, nulls, infinities, nans, bins',
'GROUP BY bin, bins_number, bin_width, nulls_count, infinities_count, nans_count, avg_val',
'FROM filtered_source, basics, nulls, bins{{?it._isFloatColumn}},infinities, nans{{?}}',
'GROUP BY bin, bins_number, bin_width, nulls_count, avg_val',
' {{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
'ORDER BY bin'
].join('\n'));
@ -149,36 +147,23 @@ Histogram.prototype.constructor = Histogram;
module.exports = Histogram;
var DATE_OIDS = {
1082: true,
1114: true,
1184: true
};
Histogram.prototype.sql = function(psql, override, callback) {
var self = this;
if (!callback) {
callback = override;
override = {};
}
var self = this;
var _column = this.column;
var columnTypeQuery = columnTypeQueryTpl({
column: _column, query: this.rewrittenQuery(this.queries.no_filters)
});
if (this._columnType === null) {
psql.query(columnTypeQuery, function(err, result) {
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
// assume numeric, will fail later
self._columnType = 'numeric';
if (!err && !!result.rows[0]) {
var pgType = result.rows[0].pg_typeof;
if (DATE_OIDS.hasOwnProperty(pgType)) {
self._columnType = 'date';
}
if (!err && !!type) {
self._columnType = Object.keys(type).find(function (key) {
return type[key];
});
}
self.sql(psql, override, callback);
}, true); // use read-only transaction
@ -191,16 +176,24 @@ Histogram.prototype.sql = function(psql, override, callback) {
return this.defaultSql(psql, override, callback);
}
var histogramSql = this._buildQuery(override);
return callback(null, histogramSql);
};
Histogram.prototype._buildQuery = function (override) {
var filteredQuery, basicsQuery, binsQuery;
var _column = this.column;
var _query = this.rewrittenQuery(this.query);
var filteredQuery, basicsQuery, binsQuery;
filteredQuery = filteredQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
});
if (override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins')) {
if (this._shouldOverride(override)) {
debug('overriding with %j', override);
basicsQuery = overrideBasicsQueryTpl({
_query: _query,
_column: _column,
@ -219,7 +212,7 @@ Histogram.prototype.sql = function(psql, override, callback) {
_column: _column
});
if (override && _.has(override, 'bins')) {
if (this._shouldOverrideBins(override)) {
binsQuery = [
overrideBinsQueryTpl({
_bins: override.bins
@ -240,17 +233,18 @@ Histogram.prototype.sql = function(psql, override, callback) {
}
}
var histogramSql = [
"WITH",
[
var cteSql = [
filteredQuery,
basicsQuery,
binsQuery,
nullsQueryTpl({
_query: _query,
_column: _column
}),
})
];
if (this._columnType === 'float') {
cteSql.push(
infinitiesQueryTpl({
_query: _query,
_column: _column
@ -259,12 +253,30 @@ Histogram.prototype.sql = function(psql, override, callback) {
_query: _query,
_column: _column
})
].join(',\n'),
);
}
var histogramSql = [
"WITH",
cteSql.join(',\n'),
histogramQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
})
].join('\n');
return callback(null, histogramSql);
debug(histogramSql);
return histogramSql;
};
Histogram.prototype._shouldOverride = function (override) {
return override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins');
};
Histogram.prototype._shouldOverrideBins = function (override) {
return override && _.has(override, 'bins');
};