Merge branch 'master' into mvt-path-conf

This commit is contained in:
David Manzanares 2017-10-05 12:18:08 +02:00
commit 148e6e6ae5
10 changed files with 1051 additions and 1094 deletions

View File

@ -5,7 +5,7 @@ Make sure that you have the requirements needed. These are
- Core
- Node.js >=6.9.x
- yarn >=0.21.3
- yarn >=0.27.5 <1.0.0
- PostgreSQL >8.3.x, PostGIS >1.5.x
- Redis >2.4.0 (http://www.redis.io)
- Mapnik >3.x. See [Installing Mapnik](https://github.com/CartoDB/Windshaft#installing-mapnik).

12
NEWS.md
View File

@ -1,13 +1,19 @@
# Changelog
## 3.13.0
## 3.13.1
Released 2017-mm-dd
- Upgrades camshaft, cartodb-query-tables, and turbo-carto: better support for query variables.
- Upgrades yarn minimum version requirement to v0.27.5
Bugfixes:
-
## 3.13.0
Released 2017-10-02
- Upgrades camshaft, cartodb-query-tables, and turbo-carto: better support for query variables.
Bugfixes:
- Bounding box parameter ignored in static named maps #735.
- camhaft 0.59.1 fixes duplicate columns in aggregate-intersection analysis
## 3.12.10
Released 2017-09-18

View File

@ -1,95 +1,178 @@
var _ = require('underscore');
var BaseWidget = require('./base');
var debug = require('debug')('windshaft:widget:aggregation');
const BaseDataview = require('./base');
const debug = require('debug')('windshaft:dataview:aggregation');
var dot = require('dot');
dot.templateSettings.strip = false;
const filteredQueryTpl = ctx => `
filtered_source AS (
SELECT *
FROM (${ctx.query}) _cdb_filtered_source
${ctx.aggregationColumn && ctx.isFloatColumn ? `
WHERE
${ctx.aggregationColumn} != 'infinity'::float
AND
${ctx.aggregationColumn} != '-infinity'::float
AND
${ctx.aggregationColumn} != 'NaN'::float` :
''
}
)
`;
var filteredQueryTpl = dot.template([
'filtered_source AS (',
' SELECT *',
' FROM ({{=it._query}}) _cdb_filtered_source',
' {{?it._aggregationColumn && it._isFloatColumn}}WHERE',
' {{=it._aggregationColumn}} != \'infinity\'::float',
' AND',
' {{=it._aggregationColumn}} != \'-infinity\'::float',
' AND',
' {{=it._aggregationColumn}} != \'NaN\'::float{{?}}',
')'
].join(' \n'));
const summaryQueryTpl = ctx => `
summary AS (
SELECT
count(1) AS count,
sum(CASE WHEN ${ctx.column} IS NULL THEN 1 ELSE 0 END) AS nulls_count
${ctx.isFloatColumn ? `,
sum(
CASE
WHEN ${ctx.aggregationColumn} = 'infinity'::float OR ${ctx.aggregationColumn} = '-infinity'::float
THEN 1
ELSE 0
END
) AS infinities_count,
sum(CASE WHEN ${ctx.aggregationColumn} = 'NaN'::float THEN 1 ELSE 0 END) AS nans_count` :
''
}
FROM (${ctx.query}) _cdb_aggregation_nulls
)
`;
var summaryQueryTpl = dot.template([
'summary AS (',
' SELECT',
' count(1) AS count,',
' sum(CASE WHEN {{=it._column}} IS NULL THEN 1 ELSE 0 END) AS nulls_count',
' {{?it._isFloatColumn}},sum(',
' CASE',
' WHEN {{=it._aggregationColumn}} = \'infinity\'::float OR {{=it._aggregationColumn}} = \'-infinity\'::float',
' THEN 1',
' ELSE 0',
' END',
' ) AS infinities_count,',
' sum(CASE WHEN {{=it._aggregationColumn}} = \'NaN\'::float THEN 1 ELSE 0 END) AS nans_count{{?}}',
' FROM ({{=it._query}}) _cdb_aggregation_nulls',
')'
].join('\n'));
const rankedCategoriesQueryTpl = ctx => `
categories AS(
SELECT
${ctx.column} AS category,
${ctx.aggregationFn} AS value,
row_number() OVER (ORDER BY ${ctx.aggregationFn} desc) as rank
FROM filtered_source
${ctx.aggregationColumn !== null ? `WHERE ${ctx.aggregationColumn} IS NOT NULL` : ''}
GROUP BY ${ctx.column}
ORDER BY 2 DESC
)
`;
var rankedCategoriesQueryTpl = dot.template([
'categories AS(',
' SELECT {{=it._column}} AS category, {{=it._aggregation}} AS value,',
' row_number() OVER (ORDER BY {{=it._aggregation}} desc) as rank',
' FROM filtered_source',
' {{?it._aggregationColumn!==null}}WHERE {{=it._aggregationColumn}} IS NOT NULL{{?}}',
' GROUP BY {{=it._column}}',
' ORDER BY 2 DESC',
')'
].join('\n'));
const categoriesSummaryMinMaxQueryTpl = () => `
categories_summary_min_max AS(
SELECT
max(value) max_val,
min(value) min_val
FROM categories
)
`;
var categoriesSummaryMinMaxQueryTpl = dot.template([
'categories_summary_min_max AS(',
' SELECT max(value) max_val, min(value) min_val',
' FROM categories',
')'
].join('\n'));
const categoriesSummaryCountQueryTpl = ctx => `
categories_summary_count AS(
SELECT count(1) AS categories_count
FROM (
SELECT ${ctx.column} AS category
FROM (${ctx.query}) _cdb_categories
GROUP BY ${ctx.column}
) _cdb_categories_count
)
`;
var categoriesSummaryCountQueryTpl = dot.template([
'categories_summary_count AS(',
' SELECT count(1) AS categories_count',
' FROM (',
' SELECT {{=it._column}} AS category',
' FROM ({{=it._query}}) _cdb_categories',
' GROUP BY {{=it._column}}',
' ) _cdb_categories_count',
')'
].join('\n'));
const specialNumericValuesColumns = () => `, nans_count, infinities_count`;
var rankedAggregationQueryTpl = dot.template([
'SELECT CAST(category AS text), value, false as agg, nulls_count, min_val, max_val,',
' count, categories_count{{?it._isFloatColumn}}, nans_count, infinities_count{{?}}',
' FROM categories, summary, categories_summary_min_max, categories_summary_count',
' WHERE rank < {{=it._limit}}',
'UNION ALL',
'SELECT \'Other\' category, {{=it._aggregationFn}}(value) as value, true as agg, nulls_count,',
' min_val, max_val, count, categories_count{{?it._isFloatColumn}}, nans_count, infinities_count{{?}}',
' FROM categories, summary, categories_summary_min_max, categories_summary_count',
' WHERE rank >= {{=it._limit}}',
'GROUP BY nulls_count, min_val, max_val, count,',
' categories_count{{?it._isFloatColumn}}, nans_count, infinities_count{{?}}'
].join('\n'));
const rankedAggregationQueryTpl = ctx => `
SELECT
CAST(category AS text),
value,
false as agg,
nulls_count,
min_val,
max_val,
count,
categories_count
${ctx.isFloatColumn ? `${specialNumericValuesColumns(ctx)}` : '' }
FROM categories, summary, categories_summary_min_max, categories_summary_count
WHERE rank < ${ctx.limit}
UNION ALL
SELECT
'Other' category,
${ctx.aggregation !== 'count' ? ctx.aggregation : 'sum'}(value) as value,
true as agg,
nulls_count,
min_val,
max_val,
count,
categories_count
${ctx.isFloatColumn ? `${specialNumericValuesColumns(ctx)}` : '' }
FROM categories, summary, categories_summary_min_max, categories_summary_count
WHERE rank >= ${ctx.limit}
GROUP BY
nulls_count,
min_val,
max_val,
count,
categories_count
${ctx.isFloatColumn ? `${specialNumericValuesColumns(ctx)}` : '' }
`;
var aggregationQueryTpl = dot.template([
'SELECT CAST({{=it._column}} AS text) AS category, {{=it._aggregation}} AS value, false as agg,',
' nulls_count, min_val, max_val, count, categories_count{{?it._isFloatColumn}}, nans_count, infinities_count{{?}}',
'FROM ({{=it._query}}) _cdb_aggregation_all, summary, categories_summary_min_max, categories_summary_count',
'GROUP BY category, nulls_count, min_val, max_val, count,',
' categories_count{{?it._isFloatColumn}}, nans_count, infinities_count{{?}}',
'ORDER BY value DESC'
].join('\n'));
const aggregationQueryTpl = ctx => `
SELECT
CAST(${ctx.column} AS text) AS category,
${ctx.aggregationFn} AS value,
false as agg,
nulls_count,
min_val,
max_val,
count,
categories_count
${ctx.isFloatColumn ? `${specialNumericValuesColumns(ctx)}` : '' }
FROM (${ctx.query}) _cdb_aggregation_all, summary, categories_summary_min_max, categories_summary_count
GROUP BY
category,
nulls_count,
min_val,
max_val,
count,
categories_count
${ctx.isFloatColumn ? `${specialNumericValuesColumns(ctx)}` : '' }
ORDER BY value DESC
`;
var CATEGORIES_LIMIT = 6;
const aggregationFnQueryTpl = ctx => `${ctx.aggregation}(${ctx.aggregationColumn})`;
var VALID_OPERATIONS = {
const aggregationDataviewQueryTpl = ctx => `
WITH
${filteredQueryTpl(ctx)},
${summaryQueryTpl(ctx)},
${rankedCategoriesQueryTpl(ctx)},
${categoriesSummaryMinMaxQueryTpl(ctx)},
${categoriesSummaryCountQueryTpl(ctx)}
${!!ctx.override.ownFilter ? `${aggregationQueryTpl(ctx)}` : `${rankedAggregationQueryTpl(ctx)}`}
`;
const filterCategoriesQueryTpl = ctx => `
SELECT
${ctx.column} AS category,
${ctx.value} AS value
FROM (${ctx.query}) _cdb_aggregation_search
WHERE CAST(${ctx.column} as text) ILIKE ${ctx.userQuery}
GROUP BY ${ctx.column}
`;
const searchQueryTpl = ctx => `
WITH
search_unfiltered AS (
${ctx.searchUnfiltered}
),
search_filtered AS (
${ctx.searchFiltered}
),
search_union AS (
SELECT * FROM search_unfiltered
UNION ALL
SELECT * FROM search_filtered
)
SELECT category, sum(value) AS value
FROM search_union
GROUP BY category
ORDER BY value desc
`;
const CATEGORIES_LIMIT = 6;
const VALID_OPERATIONS = {
count: [],
sum: ['aggregationColumn'],
avg: ['aggregationColumn'],
@ -97,7 +180,7 @@ var VALID_OPERATIONS = {
max: ['aggregationColumn']
};
var TYPE = 'aggregation';
const TYPE = 'aggregation';
/**
{
@ -108,256 +191,150 @@ var TYPE = 'aggregation';
}
}
*/
function Aggregation(query, options, queries) {
if (!_.isString(options.column)) {
throw new Error('Aggregation expects `column` in widget options');
module.exports = class Aggregation extends BaseDataview {
constructor (query, options = {}, queries = {}) {
super();
this._checkOptions(options);
this.query = query;
this.queries = queries;
this.column = options.column;
this.aggregation = options.aggregation;
this.aggregationColumn = options.aggregationColumn;
this._isFloatColumn = null;
}
if (!_.isString(options.aggregation)) {
throw new Error('Aggregation expects `aggregation` operation in widget options');
}
if (!VALID_OPERATIONS[options.aggregation]) {
throw new Error("Aggregation does not support '" + options.aggregation + "' operation");
}
var requiredOptions = VALID_OPERATIONS[options.aggregation];
var missingOptions = _.difference(requiredOptions, Object.keys(options));
if (missingOptions.length > 0) {
throw new Error(
"Aggregation '" + options.aggregation + "' is missing some options: " + missingOptions.join(',')
);
}
BaseWidget.apply(this);
this.query = query;
this.queries = queries;
this.column = options.column;
this.aggregation = options.aggregation;
this.aggregationColumn = options.aggregationColumn;
this._isFloatColumn = null;
}
Aggregation.prototype = new BaseWidget();
Aggregation.prototype.constructor = Aggregation;
module.exports = Aggregation;
Aggregation.prototype.sql = function(psql, override, callback) {
var self = this;
if (!callback) {
callback = override;
override = {};
}
if (this.aggregationColumn && this._isFloatColumn === null) {
this._isFloatColumn = false;
this.getColumnType(psql, this.aggregationColumn, this.queries.no_filters, function (err, type) {
if (!err && !!type) {
self._isFloatColumn = type.float;
}
self.sql(psql, override, callback);
});
return null;
}
var _query = this.query;
var aggregationSql;
if (!!override.ownFilter) {
aggregationSql = [
this.getCategoriesCTESql(
_query,
this.column,
this.aggregation,
this.aggregationColumn,
this._isFloatColumn
),
aggregationQueryTpl({
_isFloatColumn: this._isFloatColumn,
_query: _query,
_column: this.column,
_aggregation: this.getAggregationSql(),
_limit: CATEGORIES_LIMIT
})
].join('\n');
} else {
aggregationSql = [
this.getCategoriesCTESql(
_query,
this.column,
this.aggregation,
this.aggregationColumn,
this._isFloatColumn
),
rankedAggregationQueryTpl({
_isFloatColumn: this._isFloatColumn,
_query: _query,
_column: this.column,
_aggregationFn: this.aggregation !== 'count' ? this.aggregation : 'sum',
_limit: CATEGORIES_LIMIT
})
].join('\n');
}
debug(aggregationSql);
return callback(null, aggregationSql);
};
Aggregation.prototype.getCategoriesCTESql = function(query, column, aggregation, aggregationColumn, isFloatColumn) {
return [
"WITH",
[
filteredQueryTpl({
_isFloatColumn: isFloatColumn,
_query: this.query,
_column: this.column,
_aggregationColumn: aggregation !== 'count' ? aggregationColumn : null
}),
summaryQueryTpl({
_isFloatColumn: isFloatColumn,
_query: query,
_column: column,
_aggregationColumn: aggregation !== 'count' ? aggregationColumn : null
}),
rankedCategoriesQueryTpl({
_query: query,
_column: column,
_aggregation: this.getAggregationSql(),
_aggregationColumn: aggregation !== 'count' ? aggregationColumn : null
}),
categoriesSummaryMinMaxQueryTpl({
_query: query,
_column: column
}),
categoriesSummaryCountQueryTpl({
_query: query,
_column: column
})
].join(',\n')
].join('\n');
};
var aggregationFnQueryTpl = dot.template('{{=it._aggregationFn}}({{=it._aggregationColumn}})');
Aggregation.prototype.getAggregationSql = function() {
return aggregationFnQueryTpl({
_aggregationFn: this.aggregation,
_aggregationColumn: this.aggregationColumn || 1
});
};
Aggregation.prototype.format = function(result) {
var categories = [];
var count = 0;
var nulls = 0;
var nans = 0;
var infinities = 0;
var minValue = 0;
var maxValue = 0;
var categoriesCount = 0;
if (result.rows.length) {
var firstRow = result.rows[0];
count = firstRow.count;
nulls = firstRow.nulls_count;
nans = firstRow.nans_count;
infinities = firstRow.infinities_count;
minValue = firstRow.min_val;
maxValue = firstRow.max_val;
categoriesCount = firstRow.categories_count;
result.rows.forEach(function(row) {
categories.push(_.omit(row, 'count', 'nulls_count', 'min_val',
'max_val', 'categories_count', 'nans_count', 'infinities_count'));
});
}
return {
aggregation: this.aggregation,
count: count,
nulls: nulls,
nans: nans,
infinities: infinities,
min: minValue,
max: maxValue,
categoriesCount: categoriesCount,
categories: categories
};
};
var filterCategoriesQueryTpl = dot.template([
'SELECT {{=it._column}} AS category, {{=it._value}} AS value',
'FROM ({{=it._query}}) _cdb_aggregation_search',
'WHERE CAST({{=it._column}} as text) ILIKE {{=it._userQuery}}',
'GROUP BY {{=it._column}}'
].join('\n'));
var searchQueryTpl = dot.template([
'WITH',
'search_unfiltered AS (',
' {{=it._searchUnfiltered}}',
'),',
'search_filtered AS (',
' {{=it._searchFiltered}}',
'),',
'search_union AS (',
' SELECT * FROM search_unfiltered',
' UNION ALL',
' SELECT * FROM search_filtered',
')',
'SELECT category, sum(value) AS value',
'FROM search_union',
'GROUP BY category',
'ORDER BY value desc'
].join('\n'));
Aggregation.prototype.search = function(psql, userQuery, callback) {
var self = this;
var _userQuery = psql.escapeLiteral('%' + userQuery + '%');
var _value = this.aggregation !== 'count' && this.aggregationColumn ?
this.aggregation + '(' + this.aggregationColumn + ')' : 'count(1)';
// TODO unfiltered will be wrong as filters are already applied at this point
var query = searchQueryTpl({
_searchUnfiltered: filterCategoriesQueryTpl({
_query: this.query,
_column: this.column,
_value: '0',
_userQuery: _userQuery
}),
_searchFiltered: filterCategoriesQueryTpl({
_query: this.query,
_column: this.column,
_value: _value,
_userQuery: _userQuery
})
});
psql.query(query, function(err, result) {
if (err) {
return callback(err, result);
_checkOptions (options) {
if (typeof options.column !== 'string') {
throw new Error(`Aggregation expects 'column' in dataview options`);
}
return callback(null, {type: self.getType(), categories: result.rows });
}, true); // use read-only transaction
};
if (typeof options.aggregation !== 'string') {
throw new Error(`Aggregation expects 'aggregation' operation in dataview options`);
}
Aggregation.prototype.getType = function() {
return TYPE;
};
if (!VALID_OPERATIONS[options.aggregation]) {
throw new Error(`Aggregation does not support '${options.aggregation}' operation`);
}
Aggregation.prototype.toString = function() {
return JSON.stringify({
_type: TYPE,
_query: this.query,
_column: this.column,
_aggregation: this.aggregation
});
const requiredOptions = VALID_OPERATIONS[options.aggregation];
const missingOptions = requiredOptions.filter(requiredOption => !options.hasOwnProperty(requiredOption));
if (missingOptions.length > 0) {
throw new Error(
`Aggregation '${options.aggregation}' is missing some options: ${missingOptions.join(',')}`
);
}
}
sql (psql, override, callback) {
if (!callback) {
callback = override;
override = {};
}
if (this._shouldCheckColumnType()) {
this._isFloatColumn = false;
this.getColumnType(psql, this.aggregationColumn, this.queries.no_filters, (err, type) => {
if (!err && !!type) {
this._isFloatColumn = type.float;
}
this.sql(psql, override, callback);
});
return null;
}
const aggregationSql = aggregationDataviewQueryTpl({
override: override,
query: this.query,
column: this.column,
aggregation: this.aggregation,
aggregationColumn: this.aggregation !== 'count' ? this.aggregationColumn : null,
aggregationFn: aggregationFnQueryTpl({
aggregation: this.aggregation,
aggregationColumn: this.aggregationColumn || 1
}),
isFloatColumn: this._isFloatColumn,
limit: CATEGORIES_LIMIT
});
debug(aggregationSql);
return callback(null, aggregationSql);
}
_shouldCheckColumnType () {
return this.aggregationColumn && this._isFloatColumn === null;
}
format (result) {
const {
count = 0,
nulls_count = 0,
nans_count = 0,
infinities_count = 0,
min_val = 0,
max_val = 0,
categories_count = 0
} = result.rows[0] || {};
return {
aggregation: this.aggregation,
count: count,
nulls: nulls_count,
nans: nans_count,
infinities: infinities_count,
min: min_val,
max: max_val,
categoriesCount: categories_count,
categories: result.rows.map(({ category, value, agg }) => ({ category, value, agg }))
};
}
search (psql, userQuery, callback) {
const escapedUserQuery = psql.escapeLiteral(`%${userQuery}%`);
const value = this.aggregation !== 'count' && this.aggregationColumn ?
`${this.aggregation}(${this.aggregationColumn})` :
'count(1)';
// TODO unfiltered will be wrong as filters are already applied at this point
const query = searchQueryTpl({
searchUnfiltered: filterCategoriesQueryTpl({
query: this.query,
column: this.column,
value: '0',
userQuery: escapedUserQuery
}),
searchFiltered: filterCategoriesQueryTpl({
query: this.query,
column: this.column,
value: value,
userQuery: escapedUserQuery
})
});
debug(query);
psql.query(query, (err, result) => {
if (err) {
return callback(err, result);
}
return callback(null, {type: this.getType(), categories: result.rows });
}, true); // use read-only transaction
}
getType () {
return TYPE;
}
toString () {
return JSON.stringify({
_type: TYPE,
_query: this.query,
_column: this.column,
_aggregation: this.aggregation
});
}
};

View File

@ -1,717 +1,72 @@
var _ = require('underscore');
var BaseWidget = require('./base');
var debug = require('debug')('windshaft:dataview:histogram');
const debug = require('debug')('windshaft:dataview:histogram');
const NumericHistogram = require('./histograms/numeric-histogram');
const DateHistogram = require('./histograms/date-histogram');
var dot = require('dot');
dot.templateSettings.strip = false;
const DATE_HISTOGRAM = 'DateHistogram';
const NUMERIC_HISTOGRAM = 'NumericHistogram';
var columnCastTpl = dot.template("date_part('epoch', {{=it.column}})");
module.exports = class Histogram {
constructor (query, options, queries) {
this.query = query;
this.options = options || {};
this.queries = queries;
var dateIntervalQueryTpl = dot.template([
'WITH',
'__cdb_dates AS (',
' SELECT',
' MAX({{=it.column}}::timestamp) AS __cdb_end,',
' MIN({{=it.column}}::timestamp) AS __cdb_start',
' FROM ({{=it.query}}) __cdb_source',
'),',
'__cdb_interval_in_days AS (',
' SELECT' ,
' DATE_PART(\'day\', __cdb_end - __cdb_start) AS __cdb_days',
' FROM __cdb_dates',
'),',
'__cdb_interval_in_hours AS (',
' SELECT',
' __cdb_days * 24 + DATE_PART(\'hour\', __cdb_end - __cdb_start) AS __cdb_hours',
' FROM __cdb_interval_in_days, __cdb_dates',
'),',
'__cdb_interval_in_minutes AS (',
' SELECT',
' __cdb_hours * 60 + DATE_PART(\'minute\', __cdb_end - __cdb_start) AS __cdb_minutes',
' FROM __cdb_interval_in_hours, __cdb_dates',
'),',
'__cdb_interval_in_seconds AS (',
' SELECT',
' __cdb_minutes * 60 + DATE_PART(\'second\', __cdb_end - __cdb_start) AS __cdb_seconds',
' FROM __cdb_interval_in_minutes, __cdb_dates',
')',
'SELECT',
' ROUND(__cdb_days / 365) AS year,',
' ROUND(__cdb_days / 90) AS quarter,',
' ROUND(__cdb_days / 30) AS month,',
' ROUND(__cdb_days / 7) AS week,',
' __cdb_days AS day,',
' __cdb_hours AS hour,',
' __cdb_minutes AS minute,',
' __cdb_seconds AS second',
'FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds'
].join('\n'));
var MAX_INTERVAL_VALUE = 366;
var BIN_MIN_NUMBER = 6;
var BIN_MAX_NUMBER = 48;
var filteredQueryTpl = dot.template([
'__cdb_filtered_source AS (',
' SELECT *',
' FROM ({{=it._query}}) __cdb_filtered_source_query',
' WHERE',
' {{=it._column}} IS NOT NULL',
' {{?it._isFloatColumn}}AND',
' {{=it._column}} != \'infinity\'::float',
' AND',
' {{=it._column}} != \'-infinity\'::float',
' AND',
' {{=it._column}} != \'NaN\'::float{{?}}',
')'
].join(' \n'));
var basicsQueryTpl = dot.template([
'__cdb_basics AS (',
' SELECT',
' max({{=it._column}}) AS __cdb_max_val, min({{=it._column}}) AS __cdb_min_val,',
' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows',
' FROM __cdb_filtered_source',
')'
].join(' \n'));
var overrideBasicsQueryTpl = dot.template([
'__cdb_basics AS (',
' SELECT',
' max({{=it._end}}) AS __cdb_max_val, min({{=it._start}}) AS __cdb_min_val,',
' avg({{=it._column}}) AS __cdb_avg_val, count(1) AS __cdb_total_rows',
' FROM __cdb_filtered_source',
')'
].join('\n'));
var iqrQueryTpl = dot.template([
'__cdb_iqrange AS (',
' SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr',
' FROM (',
' SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (',
' SELECT {{=it._column}} AS _cdb_iqr_column, ntile(4) over (order by {{=it._column}}',
' ) AS quartile',
' FROM __cdb_filtered_source) _cdb_quartiles',
' WHERE quartile = 1 or quartile = 3',
' GROUP BY quartile',
' ) __cdb_iqr',
')'
].join('\n'));
var binsQueryTpl = dot.template([
'__cdb_bins AS (',
' SELECT CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0',
' THEN 1',
' ELSE GREATEST(',
' LEAST({{=it._minBins}}, CAST(__cdb_total_rows AS INT)),',
' LEAST(',
' CAST(((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3))) AS INT),',
' {{=it._maxBins}}',
' )',
' )',
' END AS __cdb_bins_number',
' FROM __cdb_basics, __cdb_iqrange, __cdb_filtered_source',
' LIMIT 1',
')'
].join('\n'));
var overrideBinsQueryTpl = dot.template([
'__cdb_bins AS (',
' SELECT {{=it._bins}} AS __cdb_bins_number',
')'
].join('\n'));
var nullsQueryTpl = dot.template([
'__cdb_nulls AS (',
' SELECT',
' count(*) AS __cdb_nulls_count',
' FROM ({{=it._query}}) __cdb_histogram_nulls',
' WHERE {{=it._column}} IS NULL',
')'
].join('\n'));
var infinitiesQueryTpl = dot.template([
'__cdb_infinities AS (',
' SELECT',
' count(*) AS __cdb_infinities_count',
' FROM ({{=it._query}}) __cdb_infinities_query',
' WHERE',
' {{=it._column}} = \'infinity\'::float',
' OR',
' {{=it._column}} = \'-infinity\'::float',
')'
].join('\n'));
var nansQueryTpl = dot.template([
'__cdb_nans AS (',
' SELECT',
' count(*) AS __cdb_nans_count',
' FROM ({{=it._query}}) __cdb_nans_query',
' WHERE {{=it._column}} = \'NaN\'::float',
')'
].join('\n'));
var histogramQueryTpl = dot.template([
'SELECT',
' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,',
' __cdb_bins_number AS bins_number,',
' __cdb_nulls_count AS nulls_count,',
' {{?it._isFloatColumn}}__cdb_infinities_count AS infinities_count,',
' __cdb_nans_count AS nans_count,{{?}}',
' __cdb_avg_val AS avg_val,',
' CASE WHEN __cdb_min_val = __cdb_max_val',
' THEN 0',
' ELSE GREATEST(',
' 1,',
' LEAST(',
' WIDTH_BUCKET({{=it._column}}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),',
' __cdb_bins_number',
' )',
' ) - 1',
' END AS bin,',
' min({{=it._column}})::numeric AS min,',
' max({{=it._column}})::numeric AS max,',
' avg({{=it._column}})::numeric AS avg,',
' count(*) AS freq',
'FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls,',
' __cdb_bins{{?it._isFloatColumn}}, __cdb_infinities, __cdb_nans{{?}}',
'GROUP BY bin, bins_number, bin_width, nulls_count,',
' avg_val{{?it._isFloatColumn}}, infinities_count, nans_count{{?}}',
'ORDER BY bin'
].join('\n'));
var dateBasicsQueryTpl = dot.template([
'__cdb_basics AS (',
' SELECT',
' max(date_part(\'epoch\', {{=it._column}})) AS __cdb_max_val,',
' min(date_part(\'epoch\', {{=it._column}})) AS __cdb_min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,',
' min(date_trunc(',
' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' )) AS __cdb_start_date,',
' max({{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\') AS __cdb_end_date,',
' count(1) AS __cdb_total_rows',
' FROM ({{=it._query}}) __cdb_basics_query',
')'
].join(' \n'));
var dateOverrideBasicsQueryTpl = dot.template([
'__cdb_basics AS (',
' SELECT',
' max({{=it._end}})::float AS __cdb_max_val,',
' min({{=it._start}})::float AS __cdb_min_val,',
' avg(date_part(\'epoch\', {{=it._column}})) AS __cdb_avg_val,',
' min(',
' date_trunc(',
' \'{{=it._aggregation}}\',',
' TO_TIMESTAMP({{=it._start}})::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' )',
' ) AS __cdb_start_date,',
' max(',
' TO_TIMESTAMP({{=it._end}})::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' ) AS __cdb_end_date,',
' count(1) AS __cdb_total_rows',
' FROM ({{=it._query}}) __cdb_basics_query',
')'
].join(' \n'));
var dateBinsQueryTpl = dot.template([
'__cdb_bins AS (',
' SELECT',
' __cdb_bins_array,',
' ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number',
' FROM (',
' SELECT',
' ARRAY(',
' SELECT GENERATE_SERIES(',
' __cdb_start_date::timestamptz,',
' __cdb_end_date::timestamptz,',
' {{?it._aggregation==="quarter"}}\'3 month\'{{??}}\'1 {{=it._aggregation}}\'{{?}}::interval',
' )',
' ) AS __cdb_bins_array',
' FROM __cdb_basics',
' ) __cdb_bins_array_query',
')'
].join('\n'));
var dateHistogramQueryTpl = dot.template([
'SELECT',
' (__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,',
' __cdb_bins_number AS bins_number,',
' __cdb_nulls_count AS nulls_count,',
' CASE WHEN __cdb_min_val = __cdb_max_val',
' THEN 0',
' ELSE GREATEST(1, LEAST(',
' WIDTH_BUCKET(',
' {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\',',
' __cdb_bins_array',
' ),',
' __cdb_bins_number',
' )) - 1',
' END AS bin,',
' min(',
' date_part(',
' \'epoch\', ',
' date_trunc(',
' \'{{=it._aggregation}}\', {{=it._column}}::timestamp AT TIME ZONE \'{{=it._offset}}\'',
' ) AT TIME ZONE \'{{=it._offset}}\'',
' )',
' )::numeric AS timestamp,',
' date_part(\'epoch\', __cdb_start_date)::numeric AS timestamp_start,',
' min(date_part(\'epoch\', {{=it._column}}))::numeric AS min,',
' max(date_part(\'epoch\', {{=it._column}}))::numeric AS max,',
' avg(date_part(\'epoch\', {{=it._column}}))::numeric AS avg,',
' count(*) AS freq',
'FROM ({{=it._query}}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls',
'WHERE date_part(\'epoch\', {{=it._column}}) IS NOT NULL',
'GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start',
'ORDER BY bin'
].join('\n'));
var TYPE = 'histogram';
/**
Numeric histogram:
{
type: 'histogram',
options: {
column: 'name', // column data type: numeric
bins: 10 // OPTIONAL
}
}
Time series:
{
type: 'histogram',
options: {
column: 'date', // column data type: date
aggregation: 'day' // OPTIONAL (if undefined then it'll be built as numeric)
offset: -7200 // OPTIONAL (UTC offset in seconds)
}
}
*/
function Histogram(query, options, queries) {
if (!_.isString(options.column)) {
throw new Error('Histogram expects `column` in widget options');
this.histogramImplementation = this._getHistogramImplementation();
}
this.query = query;
this.queries = queries;
this.column = options.column;
this.bins = options.bins;
this.aggregation = options.aggregation;
this.offset = options.offset;
_getHistogramImplementation (override) {
let implementation = null;
this._columnType = null;
}
Histogram.prototype = new BaseWidget();
Histogram.prototype.constructor = Histogram;
module.exports = Histogram;
Histogram.prototype.sql = function(psql, override, callback) {
var self = this;
if (!callback) {
callback = override;
override = {};
}
if (this._columnType === null) {
this.getColumnType(psql, this.column, this.queries.no_filters, function (err, type) {
// assume numeric, will fail later
self._columnType = 'numeric';
if (!err && !!type) {
self._columnType = Object.keys(type).find(function (key) {
return type[key];
});
}
self.sql(psql, override, callback);
}, true); // use read-only transaction
return null;
}
this._buildQuery(psql, override, callback);
};
Histogram.prototype.isDateHistogram = function (override) {
return this._columnType === 'date' && (this.aggregation !== undefined || override.aggregation !== undefined);
};
Histogram.prototype._buildQuery = function (psql, override, callback) {
var filteredQuery, basicsQuery, binsQuery;
var _column = this.column;
var _query = this.query;
if (this.isDateHistogram(override)) {
return this._buildDateHistogramQuery(psql, override, callback);
}
if (this._columnType === 'date') {
_column = columnCastTpl({column: _column});
}
filteredQuery = filteredQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
});
if (this._shouldOverride(override)) {
debug('overriding with %j', override);
basicsQuery = overrideBasicsQueryTpl({
_query: _query,
_column: _column,
_start: getBinStart(override),
_end: getBinEnd(override)
});
binsQuery = [
overrideBinsQueryTpl({
_bins: override.bins
})
].join(',\n');
} else {
basicsQuery = basicsQueryTpl({
_query: _query,
_column: _column
});
if (this._shouldOverrideBins(override)) {
binsQuery = [
overrideBinsQueryTpl({
_bins: override.bins
})
].join(',\n');
} else {
binsQuery = [
iqrQueryTpl({
_query: _query,
_column: _column
}),
binsQueryTpl({
_query: _query,
_minBins: BIN_MIN_NUMBER,
_maxBins: BIN_MAX_NUMBER
})
].join(',\n');
}
}
var cteSql = [
filteredQuery,
basicsQuery,
binsQuery,
nullsQueryTpl({
_query: _query,
_column: _column
})
];
if (this._columnType === 'float') {
cteSql.push(
infinitiesQueryTpl({
_query: _query,
_column: _column
}),
nansQueryTpl({
_query: _query,
_column: _column
})
);
}
var histogramSql = [
"WITH",
cteSql.join(',\n'),
histogramQueryTpl({
_isFloatColumn: this._columnType === 'float',
_query: _query,
_column: _column
})
].join('\n');
debug(histogramSql);
return callback(null, histogramSql);
};
Histogram.prototype._shouldOverride = function (override) {
return override && _.has(override, 'start') && _.has(override, 'end') && _.has(override, 'bins');
};
Histogram.prototype._shouldOverrideBins = function (override) {
return override && _.has(override, 'bins');
};
var DATE_AGGREGATIONS = {
'auto': true,
'minute': true,
'hour': true,
'day': true,
'week': true,
'month': true,
'quarter': true,
'year': true
};
Histogram.prototype._buildDateHistogramQuery = function (psql, override, callback) {
var _column = this.column;
var _query = this.query;
var _aggregation = override && override.aggregation ? override.aggregation : this.aggregation;
var _offset = override && Number.isFinite(override.offset) ? override.offset : this.offset;
if (!DATE_AGGREGATIONS.hasOwnProperty(_aggregation)) {
return callback(new Error('Invalid aggregation value. Valid ones: ' +
Object.keys(DATE_AGGREGATIONS).join(', ')
));
}
if (_aggregation === 'auto') {
this.getAutomaticAggregation(psql, function (err, aggregation) {
if (err || aggregation === 'none') {
this.aggregation = 'day';
} else {
this.aggregation = aggregation;
}
override.aggregation = this.aggregation;
this._buildDateHistogramQuery(psql, override, callback);
}.bind(this));
return null;
}
var dateBasicsQuery;
if (override && _.has(override, 'start') && _.has(override, 'end')) {
dateBasicsQuery = dateOverrideBasicsQueryTpl({
_query: _query,
_column: _column,
_aggregation: _aggregation,
_start: getBinStart(override),
_end: getBinEnd(override),
_offset: parseOffset(_offset, _aggregation)
});
} else {
dateBasicsQuery = dateBasicsQueryTpl({
_query: _query,
_column: _column,
_aggregation: _aggregation,
_offset: parseOffset(_offset, _aggregation)
});
}
var dateBinsQuery = [
dateBinsQueryTpl({
_aggregation: _aggregation
})
].join(',\n');
var nullsQuery = nullsQueryTpl({
_query: _query,
_column: _column
});
var dateHistogramQuery = dateHistogramQueryTpl({
_query: _query,
_column: _column,
_aggregation: _aggregation,
_offset: parseOffset(_offset, _aggregation)
});
var histogramSql = [
"WITH",
[
dateBasicsQuery,
dateBinsQuery,
nullsQuery
].join(',\n'),
dateHistogramQuery
].join('\n');
debug(histogramSql);
return callback(null, histogramSql);
};
Histogram.prototype.getAutomaticAggregation = function (psql, callback) {
var dateIntervalQuery = dateIntervalQueryTpl({
query: this.query,
column: this.column
});
debug(dateIntervalQuery);
psql.query(dateIntervalQuery, function (err, result) {
if (err) {
return callback(err);
switch (this._getHistogramSubtype(override)) {
case DATE_HISTOGRAM:
debug('Delegating to DateHistogram with options: %j and overriding: %j', this.options, override);
implementation = new DateHistogram(this.query, this.options, this.queries);
break;
case NUMERIC_HISTOGRAM:
debug('Delegating to NumericHistogram with options: %j and overriding: %j', this.options, override);
implementation = new NumericHistogram(this.query, this.options, this.queries);
break;
default:
throw new Error('Unsupported Histogram type');
}
var aggegations = result.rows[0];
var aggregation = Object.keys(aggegations)
.map(function (key) {
return {
name: key,
value: aggegations[key]
};
})
.reduce(function (closer, current) {
if (current.value > MAX_INTERVAL_VALUE) {
return closer;
}
return implementation;
}
var closerDiff = MAX_INTERVAL_VALUE - closer.value;
var currentDiff = MAX_INTERVAL_VALUE - current.value;
if (Number.isFinite(current.value) && closerDiff > currentDiff) {
return current;
}
return closer;
}, { name: 'none', value: -1 });
callback(null, aggregation.name);
});
};
Histogram.prototype.format = function(result, override) {
override = override || {};
var buckets = [];
var binsCount = getBinsCount(override);
var width = getWidth(override);
var binsStart = getBinStart(override);
var nulls = 0;
var infinities = 0;
var nans = 0;
var avg;
var timestampStart;
var aggregation;
var offset;
if (result.rows.length) {
var firstRow = result.rows[0];
binsCount = firstRow.bins_number;
width = firstRow.bin_width || width;
avg = firstRow.avg_val;
nulls = firstRow.nulls_count;
timestampStart = firstRow.timestamp_start;
infinities = firstRow.infinities_count;
nans = firstRow.nans_count;
binsStart = populateBinStart(override, firstRow);
if (Number.isFinite(timestampStart)) {
aggregation = getAggregation(override, this.aggregation);
offset = getOffset(override, this.offset);
_getHistogramSubtype (override) {
if(this._isDateHistogram(override)) {
return DATE_HISTOGRAM;
}
buckets = result.rows.map(function(row) {
return _.omit(
row,
'bins_number',
'bin_width',
'nulls_count',
'infinities_count',
'nans_count',
'avg_val',
'timestamp_start'
);
});
return NUMERIC_HISTOGRAM;
}
return {
aggregation: aggregation,
offset: offset,
timestamp_start: timestampStart,
bin_width: width,
bins_count: binsCount,
bins_start: binsStart,
nulls: nulls,
infinities: infinities,
nans: nans,
avg: avg,
bins: buckets
};
};
function getAggregation(override, aggregation) {
return override && override.aggregation ? override.aggregation : aggregation;
}
function getOffset(override, offset) {
if (override && override.offset) {
return override.offset;
}
if (offset) {
return offset;
}
return 0;
}
function getBinStart(override) {
if (override.hasOwnProperty('start') && override.hasOwnProperty('end')) {
return Math.min(override.start, override.end);
}
return override.start || 0;
}
function getBinEnd(override) {
if (override.hasOwnProperty('start') && override.hasOwnProperty('end')) {
return Math.max(override.start, override.end);
}
return override.end || 0;
}
function getBinsCount(override) {
return override.bins || 0;
}
function getWidth(override) {
var width = 0;
var binsCount = override.bins;
if (binsCount && Number.isFinite(override.start) && Number.isFinite(override.end)) {
width = (override.end - override.start) / binsCount;
}
return width;
}
function parseOffset(offset, aggregation) {
if (!offset) {
return '0';
}
if (aggregation === 'hour' || aggregation === 'minute') {
return '0';
}
var offsetInHours = Math.ceil(offset / 3600);
return '' + offsetInHours;
}
function populateBinStart(override, firstRow) {
var binStart;
if (firstRow.hasOwnProperty('timestamp')) {
binStart = firstRow.timestamp;
} else if (override.hasOwnProperty('start')) {
binStart = getBinStart(override);
} else {
binStart = firstRow.min;
}
return binStart;
}
Histogram.prototype.getType = function() {
return TYPE;
};
Histogram.prototype.toString = function() {
return JSON.stringify({
_type: TYPE,
_column: this.column,
_query: this.query
});
_isDateHistogram (override = {}) {
return (this.options.hasOwnProperty('aggregation') || override.hasOwnProperty('aggregation'));
}
getResult (psql, override, callback) {
this.histogramImplementation = this._getHistogramImplementation(override);
this.histogramImplementation.getResult(psql, override, callback);
}
// In order to keep previous behaviour with overviews,
// we have to expose the following methods to bypass
// the concrete overview implementation
sql (psql, override, callback) {
this.histogramImplementation.sql(psql, override, callback);
}
format (result, override) {
return this.histogramImplementation.format(result, override);
}
getType () {
return this.histogramImplementation.getType();
}
toString () {
return this.histogramImplementation.toString();
}
};

View File

@ -0,0 +1,85 @@
const BaseDataview = require('../base');
const TYPE = 'histogram';
module.exports = class BaseHistogram extends BaseDataview {
constructor (query, options, queries) {
super();
if (typeof options.column !== 'string') {
throw new Error('Histogram expects `column` in widget options');
}
this.query = query;
this.queries = queries;
this.column = options.column;
this.bins = options.bins;
this._columnType = null;
}
sql (psql, override, callback) {
if (!callback) {
callback = override;
override = {};
}
if (this._columnType === null) {
this.getColumnType(psql, this.column, this.queries.no_filters, (err, type) => {
// assume numeric, will fail later
this._columnType = 'numeric';
if (!err && !!type) {
this._columnType = Object.keys(type).find(function (key) {
return type[key];
});
}
this.sql(psql, override, callback);
}, true); // use read-only transaction
return null;
}
return this._buildQuery(psql, override, callback);
}
format (result, override) {
const histogram = this._getSummary(result, override);
histogram.bins = this._getBuckets(result);
return histogram;
}
getType () {
return TYPE;
}
toString () {
return JSON.stringify({
_type: TYPE,
_column: this.column,
_query: this.query
});
}
_hasOverridenRange (override) {
return override && override.hasOwnProperty('start') && override.hasOwnProperty('end');
}
_getBinStart (override = {}) {
if (this._hasOverridenRange(override)) {
return Math.min(override.start, override.end);
}
return override.start || 0;
}
_getBinEnd (override = {}) {
if (this._hasOverridenRange(override)) {
return Math.max(override.start, override.end);
}
return override.end || 0;
}
_getBinsCount (override = {}) {
return override.bins || 0;
}
};

View File

@ -0,0 +1,302 @@
const BaseHistogram = require('./base-histogram');
const debug = require('debug')('windshaft:dataview:date-histogram');
const dateIntervalQueryTpl = ctx => `
WITH
__cdb_dates AS (
SELECT
MAX(${ctx.column}::timestamp) AS __cdb_end,
MIN(${ctx.column}::timestamp) AS __cdb_start
FROM (${ctx.query}) __cdb_source
),
__cdb_interval_in_days AS (
SELECT
DATE_PART('day', __cdb_end - __cdb_start) AS __cdb_days
FROM __cdb_dates
),
__cdb_interval_in_hours AS (
SELECT
__cdb_days * 24 + DATE_PART('hour', __cdb_end - __cdb_start) AS __cdb_hours
FROM __cdb_interval_in_days, __cdb_dates
),
__cdb_interval_in_minutes AS (
SELECT
__cdb_hours * 60 + DATE_PART('minute', __cdb_end - __cdb_start) AS __cdb_minutes
FROM __cdb_interval_in_hours, __cdb_dates
),
__cdb_interval_in_seconds AS (
SELECT
__cdb_minutes * 60 + DATE_PART('second', __cdb_end - __cdb_start) AS __cdb_seconds
FROM __cdb_interval_in_minutes, __cdb_dates
)
SELECT
ROUND(__cdb_days / 365) AS year,
ROUND(__cdb_days / 90) AS quarter,
ROUND(__cdb_days / 30) AS month,
ROUND(__cdb_days / 7) AS week,
__cdb_days AS day,
__cdb_hours AS hour,
__cdb_minutes AS minute,
__cdb_seconds AS second
FROM __cdb_interval_in_days, __cdb_interval_in_hours, __cdb_interval_in_minutes, __cdb_interval_in_seconds
`;
const nullsQueryTpl = ctx => `
__cdb_nulls AS (
SELECT
count(*) AS __cdb_nulls_count
FROM (${ctx.query}) __cdb_histogram_nulls
WHERE ${ctx.column} IS NULL
)
`;
const dateBasicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(date_part('epoch', ${ctx.column})) AS __cdb_max_val,
min(date_part('epoch', ${ctx.column})) AS __cdb_min_val,
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
min(
date_trunc(
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
)
) AS __cdb_start_date,
max(${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}') AS __cdb_end_date,
count(1) AS __cdb_total_rows
FROM (${ctx.query}) __cdb_basics_query
)
`;
const dateOverrideBasicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(${ctx.end})::float AS __cdb_max_val,
min(${ctx.start})::float AS __cdb_min_val,
avg(date_part('epoch', ${ctx.column})) AS __cdb_avg_val,
min(
date_trunc(
'${ctx.aggregation}',
TO_TIMESTAMP(${ctx.start})::timestamp AT TIME ZONE '${ctx.offset}'
)
) AS __cdb_start_date,
max(
TO_TIMESTAMP(${ctx.end})::timestamp AT TIME ZONE '${ctx.offset}'
) AS __cdb_end_date,
count(1) AS __cdb_total_rows
FROM (${ctx.query}) __cdb_basics_query
)
`;
const dateBinsQueryTpl = ctx => `
__cdb_bins AS (
SELECT
__cdb_bins_array,
ARRAY_LENGTH(__cdb_bins_array, 1) AS __cdb_bins_number
FROM (
SELECT
ARRAY(
SELECT GENERATE_SERIES(
__cdb_start_date::timestamptz,
__cdb_end_date::timestamptz,
${ctx.aggregation === 'quarter' ? `'3 month'::interval` : `'1 ${ctx.aggregation}'::interval`}
)
) AS __cdb_bins_array
FROM __cdb_basics
) __cdb_bins_array_query
)
`;
const dateHistogramQueryTpl = ctx => `
SELECT
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
__cdb_bins_number AS bins_number,
__cdb_nulls_count AS nulls_count,
CASE WHEN __cdb_min_val = __cdb_max_val
THEN 0
ELSE GREATEST(
1,
LEAST(
WIDTH_BUCKET(
${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}',
__cdb_bins_array
),
__cdb_bins_number
)
) - 1
END AS bin,
min(
date_part(
'epoch',
date_trunc(
'${ctx.aggregation}', ${ctx.column}::timestamp AT TIME ZONE '${ctx.offset}'
) AT TIME ZONE '${ctx.offset}'
)
)::numeric AS timestamp,
date_part('epoch', __cdb_start_date)::numeric AS timestamp_start,
min(date_part('epoch', ${ctx.column}))::numeric AS min,
max(date_part('epoch', ${ctx.column}))::numeric AS max,
avg(date_part('epoch', ${ctx.column}))::numeric AS avg,
count(*) AS freq
FROM (${ctx.query}) __cdb_histogram, __cdb_basics, __cdb_bins, __cdb_nulls
WHERE date_part('epoch', ${ctx.column}) IS NOT NULL
GROUP BY bin, bins_number, bin_width, nulls_count, timestamp_start
ORDER BY bin
`;
const MAX_INTERVAL_VALUE = 366;
const DATE_AGGREGATIONS = {
'auto': true,
'minute': true,
'hour': true,
'day': true,
'week': true,
'month': true,
'quarter': true,
'year': true
};
/**
date_histogram: {
type: 'histogram',
options: {
column: 'date', // column data type: date
aggregation: 'day' // MANDATORY
offset: -7200 // OPTIONAL (UTC offset in seconds)
}
}
*/
module.exports = class DateHistogram extends BaseHistogram {
constructor (query, options, queries) {
super(query, options, queries);
this.aggregation = options.aggregation;
this.offset = options.offset;
}
_buildQueryTpl (ctx) {
return `
WITH
${this._hasOverridenRange(ctx.override) ? dateOverrideBasicsQueryTpl(ctx) : dateBasicsQueryTpl(ctx)},
${dateBinsQueryTpl(ctx)},
${nullsQueryTpl(ctx)}
${dateHistogramQueryTpl(ctx)}
`;
}
_buildQuery (psql, override, callback) {
if (!this._isValidAggregation(override)) {
return callback(new Error('Invalid aggregation value. Valid ones: ' +
Object.keys(DATE_AGGREGATIONS).join(', ')
));
}
if (this._getAggregation(override) === 'auto') {
this._getAutomaticAggregation(psql, function (err, aggregation) {
if (err || aggregation === 'none') {
this.aggregation = 'day';
} else {
this.aggregation = aggregation;
}
override.aggregation = this.aggregation;
this._buildQuery(psql, override, callback);
}.bind(this));
return null;
}
const histogramSql = this._buildQueryTpl({
override: override,
query: this.query,
column: this.column,
aggregation: this._getAggregation(override),
start: this._getBinStart(override),
end: this._getBinEnd(override),
offset: this._parseOffset(override)
});
debug(histogramSql);
return callback(null, histogramSql);
}
_isValidAggregation (override) {
return DATE_AGGREGATIONS.hasOwnProperty(this._getAggregation(override));
}
_getAutomaticAggregation (psql, callback) {
const dateIntervalQuery = dateIntervalQueryTpl({
query: this.query,
column: this.column
});
psql.query(dateIntervalQuery, function (err, result) {
if (err) {
return callback(err);
}
const aggegations = result.rows[0];
const aggregation = Object.keys(aggegations)
.map(key => ({ name: key, value: aggegations[key] }))
.reduce((closer, current) => {
if (current.value > MAX_INTERVAL_VALUE) {
return closer;
}
const closerDiff = MAX_INTERVAL_VALUE - closer.value;
const currentDiff = MAX_INTERVAL_VALUE - current.value;
if (Number.isFinite(current.value) && closerDiff > currentDiff) {
return current;
}
return closer;
}, { name: 'none', value: -1 });
callback(null, aggregation.name);
});
}
_getSummary (result, override) {
const firstRow = result.rows[0] || {};
return {
aggregation: this._getAggregation(override),
offset: this._getOffset(override),
timestamp_start: firstRow.timestamp_start,
bin_width: firstRow.bin_width,
bins_count: firstRow.bins_number,
bins_start: firstRow.timestamp,
nulls: firstRow.nulls_count,
infinities: firstRow.infinities_count,
nans: firstRow.nans_count,
avg: firstRow.avg_val
};
}
_getBuckets (result) {
return result.rows.map(({ bin, min, max, avg, freq, timestamp }) => ({ bin, min, max, avg, freq, timestamp }));
}
_getAggregation (override = {}) {
return override.aggregation ? override.aggregation : this.aggregation;
}
_getOffset (override = {}) {
return Number.isFinite(override.offset) ? override.offset : (this.offset || 0);
}
_parseOffset (override) {
if (this._shouldIgnoreOffset(override)) {
return '0';
}
const offsetInHours = Math.ceil(this._getOffset(override) / 3600);
return '' + offsetInHours;
}
_shouldIgnoreOffset (override) {
return (this._getAggregation(override) === 'hour' || this._getAggregation(override) === 'minute');
}
};

View File

@ -0,0 +1,234 @@
const BaseHistogram = require('./base-histogram');
const debug = require('debug')('windshaft:dataview:numeric-histogram');
const columnCastTpl = ctx => `date_part('epoch', ${ctx.column})`;
const filterOutSpecialNumericValues = ctx => `
${ctx.column} != 'infinity'::float
AND
${ctx.column} != '-infinity'::float
AND
${ctx.column} != 'NaN'::float
`;
const filteredQueryTpl = ctx => `
__cdb_filtered_source AS (
SELECT *
FROM (${ctx.query}) __cdb_filtered_source_query
WHERE ${ctx.column} IS NOT NULL
${ctx.isFloatColumn ? `AND ${filterOutSpecialNumericValues(ctx)}` : ''}
)
`;
const basicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(${ctx.column}) AS __cdb_max_val, min(${ctx.column}) AS __cdb_min_val,
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows
FROM __cdb_filtered_source
)
`;
const overrideBasicsQueryTpl = ctx => `
__cdb_basics AS (
SELECT
max(${ctx.end}) AS __cdb_max_val, min(${ctx.start}) AS __cdb_min_val,
avg(${ctx.column}) AS __cdb_avg_val, count(1) AS __cdb_total_rows
FROM __cdb_filtered_source
)
`;
const iqrQueryTpl = ctx => `
__cdb_iqrange AS (
SELECT max(quartile_max) - min(quartile_max) AS __cdb_iqr
FROM (
SELECT quartile, max(_cdb_iqr_column) AS quartile_max from (
SELECT ${ctx.column} AS _cdb_iqr_column, ntile(4) over (order by ${ctx.column}
) AS quartile
FROM __cdb_filtered_source) _cdb_quartiles
WHERE quartile = 1 or quartile = 3
GROUP BY quartile
) __cdb_iqr
)
`;
const binsQueryTpl = ctx => `
__cdb_bins AS (
SELECT
CASE WHEN __cdb_total_rows = 0 OR __cdb_iqr = 0
THEN 1
ELSE GREATEST(
LEAST(${ctx.minBins}, CAST(__cdb_total_rows AS INT)),
LEAST(
CAST(((__cdb_max_val - __cdb_min_val) / (2 * __cdb_iqr * power(__cdb_total_rows, 1/3))) AS INT),
${ctx.maxBins}
)
)
END AS __cdb_bins_number
FROM __cdb_basics, __cdb_iqrange, __cdb_filtered_source
LIMIT 1
)
`;
const overrideBinsQueryTpl = ctx => `
__cdb_bins AS (
SELECT ${ctx.override.bins} AS __cdb_bins_number
)
`;
const nullsQueryTpl = ctx => `
__cdb_nulls AS (
SELECT
count(*) AS __cdb_nulls_count
FROM (${ctx.query}) __cdb_histogram_nulls
WHERE ${ctx.column} IS NULL
)
`;
const infinitiesQueryTpl = ctx => `
__cdb_infinities AS (
SELECT
count(*) AS __cdb_infinities_count
FROM (${ctx.query}) __cdb_infinities_query
WHERE
${ctx.column} = 'infinity'::float
OR
${ctx.column} = '-infinity'::float
)
`;
const nansQueryTpl = ctx => `
__cdb_nans AS (
SELECT
count(*) AS __cdb_nans_count
FROM (${ctx.query}) __cdb_nans_query
WHERE ${ctx.column} = 'NaN'::float
)
`;
const specialNumericValuesColumnDefinitionTpl = () => `
__cdb_infinities_count AS infinities_count,
__cdb_nans_count AS nans_count
`;
const specialNumericValuesCTETpl = () => `
__cdb_infinities, __cdb_nans
`;
const specialNumericValuesColumnTpl = () => `
infinities_count, nans_count
`;
const histogramQueryTpl = ctx => `
SELECT
(__cdb_max_val - __cdb_min_val) / cast(__cdb_bins_number as float) AS bin_width,
__cdb_bins_number AS bins_number,
__cdb_nulls_count AS nulls_count,
${ctx.isFloatColumn ? `${specialNumericValuesColumnDefinitionTpl()},` : ''}
__cdb_avg_val AS avg_val,
CASE WHEN __cdb_min_val = __cdb_max_val
THEN 0
ELSE GREATEST(
1,
LEAST(
WIDTH_BUCKET(${ctx.column}, __cdb_min_val, __cdb_max_val, __cdb_bins_number),
__cdb_bins_number
)
) - 1
END AS bin,
min(${ctx.column})::numeric AS min,
max(${ctx.column})::numeric AS max,
avg(${ctx.column})::numeric AS avg,
count(*) AS freq
FROM __cdb_filtered_source, __cdb_basics, __cdb_nulls, __cdb_bins
${ctx.isFloatColumn ? `, ${specialNumericValuesCTETpl()}` : ''}
GROUP BY bin, bins_number, bin_width, nulls_count, avg_val
${ctx.isFloatColumn ? `, ${specialNumericValuesColumnTpl()}` : ''}
ORDER BY bin
`;
const BIN_MIN_NUMBER = 6;
const BIN_MAX_NUMBER = 48;
/**
Numeric histogram:
{
type: 'histogram',
options: {
column: 'name', // column data type: numeric
bins: 10 // OPTIONAL
}
}
*/
module.exports = class NumericHistogram extends BaseHistogram {
constructor (query, options, queries) {
super(query, options, queries);
}
_buildQuery (psql, override, callback) {
const histogramSql = this._buildQueryTpl({
override: override,
column: this._columnType === 'date' ? columnCastTpl({ column: this.column }) : this.column,
isFloatColumn: this._columnType === 'float',
query: this.query,
start: this._getBinStart(override),
end: this._getBinEnd(override),
minBins: BIN_MIN_NUMBER,
maxBins: BIN_MAX_NUMBER,
});
debug(histogramSql);
return callback(null, histogramSql);
}
_buildQueryTpl (ctx) {
return `
WITH
${filteredQueryTpl(ctx)},
${this._hasOverridenRange(ctx.override) ? overrideBasicsQueryTpl(ctx) : basicsQueryTpl(ctx)},
${this._hasOverridenBins(ctx.override) ?
overrideBinsQueryTpl(ctx) :
`${iqrQueryTpl(ctx)}, ${binsQueryTpl(ctx)}`
},
${nullsQueryTpl(ctx)}
${ctx.isFloatColumn ? `,${infinitiesQueryTpl(ctx)}, ${nansQueryTpl(ctx)}` : ''}
${histogramQueryTpl(ctx)}
`;
}
_hasOverridenBins (override) {
return override && override.hasOwnProperty('bins');
}
_getSummary (result, override) {
const firstRow = result.rows[0] || {};
return {
bin_width: firstRow.bin_width,
bins_count: firstRow.bins_number,
bins_start: this._populateBinStart(firstRow, override),
nulls: firstRow.nulls_count,
infinities: firstRow.infinities_count,
nans: firstRow.nans_count,
avg: firstRow.avg_val,
};
}
_getBuckets (result) {
return result.rows.map(({ bin, min, max, avg, freq }) => ({ bin, min, max, avg, freq }));
}
_populateBinStart (firstRow, override = {}) {
let binStart;
if (override.hasOwnProperty('start')) {
binStart = this._getBinStart(override);
} else {
binStart = firstRow.min;
}
return binStart;
}
};

View File

@ -1,11 +1,9 @@
var dot = require('dot');
dot.templateSettings.strip = false;
const BaseDataview = require('./base');
const debug = require('debug')('windshaft:dataview:list');
var BaseWidget = require('./base');
const TYPE = 'list';
var TYPE = 'list';
var listSqlTpl = dot.template('select {{=it._columns}} from ({{=it._query}}) as _cdb_list');
const listSqlTpl = ctx => `select ${ctx.columns} from (${ctx.query}) as _cdb_list`;
/**
{
@ -15,52 +13,52 @@ var listSqlTpl = dot.template('select {{=it._columns}} from ({{=it._query}}) as
}
}
*/
module.exports = class List extends BaseDataview {
constructor (query, options = {}) {
super();
function List(query, options) {
options = options || {};
this._checkOptions(options);
if (!Array.isArray(options.columns)) {
throw new Error('List expects `columns` array in widget options');
this.query = query;
this.columns = options.columns;
}
BaseWidget.apply(this);
this.query = query;
this.columns = options.columns;
}
List.prototype = new BaseWidget();
List.prototype.constructor = List;
module.exports = List;
List.prototype.sql = function(psql, override, callback) {
if (!callback) {
callback = override;
_checkOptions (options) {
if (!Array.isArray(options.columns)) {
throw new Error('List expects `columns` array in dataview options');
}
}
var listSql = listSqlTpl({
_query: this.query,
_columns: this.columns.join(', ')
});
sql (psql, override, callback) {
if (!callback) {
callback = override;
}
return callback(null, listSql);
};
const listSql = listSqlTpl({
query: this.query,
columns: this.columns.join(', ')
});
List.prototype.format = function(result) {
return {
rows: result.rows
};
};
debug(listSql);
List.prototype.getType = function() {
return TYPE;
};
return callback(null, listSql);
}
List.prototype.toString = function() {
return JSON.stringify({
_type: TYPE,
_query: this.query,
_columns: this.columns.join(', ')
});
format (result) {
return {
rows: result.rows
};
}
getType () {
return TYPE;
}
toString () {
return JSON.stringify({
_type: TYPE,
_query: this.query,
_columns: this.columns.join(', ')
});
}
};

View File

@ -1,7 +1,7 @@
{
"private": true,
"name": "windshaft-cartodb",
"version": "3.13.0",
"version": "3.13.1",
"description": "A map tile server for CartoDB",
"keywords": [
"cartodb"
@ -23,7 +23,7 @@
],
"dependencies": {
"body-parser": "~1.14.0",
"camshaft": "0.59.0",
"camshaft": "0.59.1",
"cartodb-psql": "0.10.1",
"cartodb-query-tables": "0.3.0",
"cartodb-redis": "0.14.0",
@ -63,6 +63,6 @@
},
"engines": {
"node": ">=6.9",
"yarn": "^0.21.3"
"yarn": ">=0.27.5 <1.0.0"
}
}

View File

@ -2,7 +2,7 @@
# yarn lockfile v1
"abaculus@github:cartodb/abaculus#2.0.3-cdb1":
abaculus@cartodb/abaculus#2.0.3-cdb1:
version "2.0.3-cdb1"
resolved "https://codeload.github.com/cartodb/abaculus/tar.gz/f5f34e1c80cdd8d49edd1d6fe3b2220ab2e23aaf"
dependencies:
@ -219,9 +219,9 @@ camelcase@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
camshaft@0.59.0:
version "0.59.0"
resolved "https://registry.yarnpkg.com/camshaft/-/camshaft-0.59.0.tgz#699c0834116ec87e067d42e11eea9540bddc6383"
camshaft@0.59.1:
version "0.59.1"
resolved "https://registry.yarnpkg.com/camshaft/-/camshaft-0.59.1.tgz#a2e87fa4a0236655160cd1a6a9e966cbbf86dd43"
dependencies:
async "^1.5.2"
bunyan "1.8.1"
@ -230,7 +230,7 @@ camshaft@0.59.0:
dot "^1.0.3"
request "^2.69.0"
"canvas@github:cartodb/node-canvas#1.6.2-cdb2":
canvas@cartodb/node-canvas#1.6.2-cdb2:
version "1.6.2-cdb2"
resolved "https://codeload.github.com/cartodb/node-canvas/tar.gz/8acf04557005c633f9e68524488a2657c04f3766"
dependencies:
@ -248,15 +248,15 @@ carto@0.16.3:
semver "^5.1.0"
yargs "^4.2.0"
"carto@github:cartodb/carto#0.15.1-cdb1":
carto@CartoDB/carto#0.15.1-cdb1:
version "0.15.1-cdb1"
resolved "https://codeload.github.com/cartodb/carto/tar.gz/8050ec843f1f32a6469e5d1cf49602773015d398"
resolved "https://codeload.github.com/CartoDB/carto/tar.gz/8050ec843f1f32a6469e5d1cf49602773015d398"
dependencies:
mapnik-reference "~6.0.2"
optimist "~0.6.0"
underscore "~1.6.0"
"carto@github:cartodb/carto#0.15.1-cdb3":
carto@cartodb/carto#0.15.1-cdb3:
version "0.15.1-cdb3"
resolved "https://codeload.github.com/cartodb/carto/tar.gz/945f5efb74fd1af1f5e1f69f409f9567f94fb5a7"
dependencies:
@ -1641,7 +1641,7 @@ pg-types@1.*:
postgres-date "~1.0.0"
postgres-interval "^1.1.0"
"pg@github:cartodb/node-postgres#6.1.6-cdb1":
pg@cartodb/node-postgres#6.1.6-cdb1:
version "6.1.6"
resolved "https://codeload.github.com/cartodb/node-postgres/tar.gz/3eef52dd1e655f658a4ee8ac5697688b3ecfed44"
dependencies:
@ -2274,7 +2274,7 @@ through@2:
version "2.3.8"
resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5"
"tilelive-bridge@github:cartodb/tilelive-bridge#2.3.1-cdb4":
tilelive-bridge@cartodb/tilelive-bridge#2.3.1-cdb4:
version "2.3.1-cdb4"
resolved "https://codeload.github.com/cartodb/tilelive-bridge/tar.gz/faa2b638da2d119b78281575d40255cb523f6ca6"
dependencies:
@ -2282,7 +2282,7 @@ through@2:
mapnik-pool "~0.1.3"
sphericalmercator "1.0.x"
"tilelive-mapnik@github:cartodb/tilelive-mapnik#0.6.18-cdb3":
tilelive-mapnik@cartodb/tilelive-mapnik#0.6.18-cdb3:
version "0.6.18-cdb3"
resolved "https://codeload.github.com/cartodb/tilelive-mapnik/tar.gz/23bd1c31dd57d0b76c86b9f1eaf62462b3c17d01"
dependencies: