Optimize formulae queries

This commit is contained in:
Raul Marin 2017-11-29 12:33:41 +01:00 committed by Raúl Marín
parent dfe01c836c
commit 243e982bd6
2 changed files with 58 additions and 100 deletions

View File

@ -1,34 +1,14 @@
const BaseDataview = require('./base'); const BaseDataview = require('./base');
const debug = require('debug')('windshaft:dataview:formula'); const debug = require('debug')('windshaft:dataview:formula');
const utils = require('../../utils/query-utils');
const countInfinitiesQueryTpl = ctx => ` const formulaQueryTpl = ctx =>
SELECT count(1) FROM (${ctx.query}) __cdb_formula_infinities `SELECT
WHERE ${ctx.column} = 'infinity'::float OR ${ctx.column} = '-infinity'::float ${ctx.operation}(${utils.handleFloatColumn(ctx)}) AS result,
`; ${utils.countNULLs(ctx)} AS nulls_count
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
const countNansQueryTpl = ctx => ` ${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
SELECT count(1) FROM (${ctx.query}) __cdb_formula_nans FROM (${ctx.query}) __cdb_formula`;
WHERE ${ctx.column} = 'NaN'::float
`;
const filterOutSpecialNumericValuesTpl = ctx => `
WHERE
${ctx.column} != 'infinity'::float
AND
${ctx.column} != '-infinity'::float
AND
${ctx.column} != 'NaN'::float
`;
const formulaQueryTpl = ctx => `
SELECT
${ctx.operation}(${ctx.column}) AS result,
(SELECT count(1) FROM (${ctx.query}) _cdb_formula_nulls WHERE ${ctx.column} IS NULL) AS nulls_count
${ctx.isFloatColumn ? `,(${countInfinitiesQueryTpl(ctx)}) AS infinities_count` : ''}
${ctx.isFloatColumn ? `,(${countNansQueryTpl(ctx)}) AS nans_count` : ''}
FROM (${ctx.query}) __cdb_formula
${ctx.isFloatColumn && ctx.operation !== 'count' ? `${filterOutSpecialNumericValuesTpl(ctx)}` : ''}
`;
const VALID_OPERATIONS = { const VALID_OPERATIONS = {
count: true, count: true,

View File

@ -1,55 +1,38 @@
var BaseOverviewsDataview = require('./base'); var BaseOverviewsDataview = require('./base');
var BaseDataview = require('../formula'); var BaseDataview = require('../formula');
var debug = require('debug')('windshaft:widget:formula:overview'); var debug = require('debug')('windshaft:widget:formula:overview');
const utils = require('../../../utils/query-utils');
var dot = require('dot'); var dot = require('dot');
dot.templateSettings.strip = false; dot.templateSettings.strip = false;
var formulaQueryTpls = { const VALID_OPERATIONS = {
'count': dot.template([ count: true,
'SELECT', sum: true,
'sum(_feature_count) AS result,', avg: true
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula'
].join('\n')),
'sum': dot.template([
'SELECT',
'sum({{=it._column}}*_feature_count) AS result,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula',
'{{?it._isFloatColumn}}WHERE',
' {{=it._column}} != \'infinity\'::float',
'AND',
' {{=it._column}} != \'-infinity\'::float',
'AND',
' {{=it._column}} != \'NaN\'::float{{?}}'
].join('\n')),
'avg': dot.template([
'SELECT',
'sum({{=it._column}}*_feature_count)/sum(_feature_count) AS result,',
'(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nulls WHERE {{=it._column}} IS NULL) AS nulls_count',
'{{?it._isFloatColumn}},(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_infinities',
' WHERE {{=it._column}} = \'infinity\'::float OR {{=it._column}} = \'-infinity\'::float) AS infinities_count',
',(SELECT count(1) FROM ({{=it._query}}) _cdb_formula_nans',
' WHERE {{=it._column}} = \'NaN\'::float) AS nans_count{{?}}',
'FROM ({{=it._query}}) _cdb_formula',
'{{?it._isFloatColumn}}WHERE',
' {{=it._column}} != \'infinity\'::float',
'AND',
' {{=it._column}} != \'-infinity\'::float',
'AND',
' {{=it._column}} != \'NaN\'::float{{?}}'
].join('\n')),
}; };
/** Formulae to calculate the end result using _feature_count from the overview table*/
function dataviewResult(ctx) {
switch (ctx.operation) {
case 'count':
return `sum(_feature_count)`;
case 'sum':
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)`;
case 'avg':
return `sum(${utils.handleFloatColumn(ctx)}*_feature_count)/sum(_feature_count) `;
}
return `${ctx.operation}(${utils.handleFloatColumn(ctx)})`;
}
const formulaQueryTpl = ctx =>
`SELECT
${dataviewResult(ctx)} AS result,
${utils.countNULLs(ctx)} AS nulls_count
${ctx.isFloatColumn ? `,${utils.countInfinites(ctx)} AS infinities_count,` : ``}
${ctx.isFloatColumn ? `${utils.countNaNs(ctx)} AS nans_count` : ``}
FROM (${ctx.query}) __cdb_formula`;
function Formula(query, options, queryRewriter, queryRewriteData, params, queries) { function Formula(query, options, queryRewriter, queryRewriteData, params, queries) {
BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries); BaseOverviewsDataview.call(this, query, options, BaseDataview, queryRewriter, queryRewriteData, params, queries);
this.column = options.column || '1'; this.column = options.column || '1';
@ -65,36 +48,31 @@ module.exports = Formula;
Formula.prototype.sql = function (psql, override, callback) { Formula.prototype.sql = function (psql, override, callback) {
var self = this; var self = this;
var formulaQueryTpl = formulaQueryTpls[this.operation]; if (!VALID_OPERATIONS[this.operation]) {
return this.defaultSql(psql, override, callback);
if (formulaQueryTpl) {
// supported formula for use with overviews
if (this._isFloatColumn === null) {
this._isFloatColumn = false;
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
if (!err && !!type) {
self._isFloatColumn = type.float;
}
self.sql(psql, override, callback);
});
return null;
}
var formulaSql = formulaQueryTpl({
_isFloatColumn: this._isFloatColumn,
_query: this.rewrittenQuery(this.query),
_operation: this.operation,
_column: this.column
});
callback = callback || override;
debug(formulaSql);
return callback(null, formulaSql);
} }
if (this._isFloatColumn === null) {
this._isFloatColumn = false;
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
if (!err && !!type) {
self._isFloatColumn = type.float;
}
self.sql(psql, override, callback);
});
return null;
}
// default behaviour var formulaSql = formulaQueryTpl({
return this.defaultSql(psql, override, callback); isFloatColumn: this._isFloatColumn,
query: this.rewrittenQuery(this.query),
operation: this.operation,
column: this.column
});
callback = callback || override;
debug(formulaSql);
return callback(null, formulaSql);
}; };