Merge pull request #246 from CartoDB/245-categories-mode
Use the mode to aggregate category columns in overviews
This commit is contained in:
commit
8302f89413
@ -88,6 +88,26 @@ AS $$
|
||||
END;
|
||||
$$ LANGUAGE PLPGSQL IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION _CDB_OverviewBaseTable(overview_table REGCLASS)
|
||||
RETURNS REGCLASS
|
||||
AS $$
|
||||
DECLARE
|
||||
table_name TEXT;
|
||||
schema_name TEXT;
|
||||
base_name TEXT;
|
||||
base_table REGCLASS;
|
||||
BEGIN
|
||||
SELECT * FROM _cdb_split_table_name(overview_table) INTO schema_name, table_name;
|
||||
base_name := _CDB_OverviewBaseTableName(table_name);
|
||||
IF base_name != table_name THEN
|
||||
base_table := Format('%I.%I', schema_name, base_name)::regclass;
|
||||
ELSE
|
||||
base_table := overview_table;
|
||||
END IF;
|
||||
RETURN base_table;
|
||||
END;
|
||||
$$ LANGUAGE PLPGSQL IMMUTABLE;
|
||||
|
||||
-- Schema and relation names of a table given its reloid
|
||||
-- Scope: private.
|
||||
-- Parameters
|
||||
@ -531,6 +551,54 @@ AS $$
|
||||
);
|
||||
$$ LANGUAGE SQL STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION _cdb_categorical_column(reloid REGCLASS, col_name TEXT)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
DECLARE
|
||||
schema_name TEXT;
|
||||
table_name TEXT;
|
||||
available BOOLEAN;
|
||||
categorical BOOLEAN;
|
||||
BEGIN
|
||||
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
|
||||
SELECT n_distinct IS NOT NULL
|
||||
FROM pg_stats
|
||||
WHERE pg_stats.schemaname = schema_name
|
||||
AND pg_stats.tablename = table_name
|
||||
AND pg_stats.attname = col_name
|
||||
INTO available;
|
||||
IF available IS NULL OR NOT available THEN
|
||||
EXECUTE Format('ANALYZE %s;', reloid);
|
||||
END IF;
|
||||
SELECT n_distinct > 0 AND n_distinct <= 20
|
||||
FROM pg_stats
|
||||
WHERE pg_stats.schemaname = schema_name
|
||||
AND pg_stats.tablename = table_name
|
||||
AND pg_stats.attname = col_name
|
||||
INTO categorical;
|
||||
RETURN categorical;
|
||||
END;
|
||||
$$ LANGUAGE PLPGSQL VOLATILE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION _cdb_mode_of_array(anyarray)
|
||||
RETURNS anyelement AS
|
||||
$$
|
||||
SELECT a
|
||||
FROM unnest($1) a
|
||||
GROUP BY 1
|
||||
ORDER BY COUNT(1) DESC, 1
|
||||
LIMIT 1;
|
||||
$$
|
||||
LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
-- Tell Postgres how to use our aggregate
|
||||
CREATE AGGREGATE _cdb_mode(anyelement) (
|
||||
SFUNC=array_append,
|
||||
STYPE=anyarray,
|
||||
FINALFUNC=_cdb_mode_of_array,
|
||||
INITCOND='{}'
|
||||
);
|
||||
|
||||
-- SQL Aggregation expression for a datase attribute
|
||||
-- Scope: private.
|
||||
-- Parameters
|
||||
@ -548,6 +616,7 @@ DECLARE
|
||||
has_counter_column BOOLEAN;
|
||||
feature_count TEXT;
|
||||
total_feature_count TEXT;
|
||||
base_table REGCLASS;
|
||||
BEGIN
|
||||
IF table_alias <> '' THEN
|
||||
qualified_column := Format('%I.%I', table_alias, column_name);
|
||||
@ -568,20 +637,30 @@ BEGIN
|
||||
total_feature_count := 'count(*)';
|
||||
END IF;
|
||||
|
||||
base_table := _CDB_OverviewBaseTable(reloid);
|
||||
|
||||
CASE column_type
|
||||
WHEN 'double precision', 'real', 'integer', 'bigint', 'numeric' THEN
|
||||
IF column_name = '_feature_count' THEN
|
||||
RETURN 'SUM(_feature_count)';
|
||||
ELSE
|
||||
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
|
||||
IF column_type = 'integer' AND _cdb_categorical_column(base_table, column_name) THEN
|
||||
RETURN Format('CDB_Math_Mode(%s)::', qualified_column) || column_type;
|
||||
ELSE
|
||||
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
|
||||
END IF;
|
||||
END IF;
|
||||
WHEN 'text', 'character varying', 'character' THEN
|
||||
IF _cdb_unlimited_text_column(reloid, column_name) THEN
|
||||
-- TODO: this should not be applied to columns containing largish text;
|
||||
-- it is intended only to short names/identifiers
|
||||
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
|
||||
IF _cdb_categorical_column(base_table, column_name) THEN
|
||||
RETURN Format('_cdb_mode(%s)::', qualified_column) || column_type;
|
||||
ELSE
|
||||
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
|
||||
IF _cdb_unlimited_text_column(base_table, column_name) THEN
|
||||
-- TODO: this should not be applied to columns containing largish text;
|
||||
-- it is intended only to short names/identifiers
|
||||
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
|
||||
ELSE
|
||||
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
|
||||
END IF;
|
||||
END IF;
|
||||
WHEN 'boolean' THEN
|
||||
RETURN 'CASE count(*) WHEN 1 THEN BOOL_AND(' || qualified_column || ') ELSE NULL END::' || column_type;
|
||||
|
Loading…
Reference in New Issue
Block a user