cartodb-postgresql/scripts-available/CDB_Overviews.sql
Mario de Frutos 4908bacc4b If existing overviews we regenerate them
Instead of make a DROP/CREATE always, in case we have existing
overviews we have to make DELETE/INSERT because DDL operations require
AccessExclusiveLock which is not compatible with AccessShareLock that
is used by the queries used to gather data for the tiler.

This incompatibility above mentioned leads to have deadlocks
2017-06-02 15:23:47 +02:00

1069 lines
39 KiB
PL/PgSQL

-- Remove a dataset's existing overview tables.
-- Scope: public
-- Parameters:
-- reloid: oid of the table.
CREATE OR REPLACE FUNCTION CDB_DropOverviews(reloid REGCLASS)
RETURNS void
AS $$
DECLARE
row record;
schema_name TEXT;
table_name TEXT;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
FOR row IN
SELECT * FROM CDB_Overviews(reloid)
LOOP
EXECUTE Format('DROP TABLE %s;', row.overview_table);
RAISE NOTICE 'Dropped overview for level %: %', row.z, row.overview_table;
END LOOP;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
-- Return existing overviews (if any) for a given dataset table
-- Scope: public
-- Parameters
-- reloid: oid of the input table.
-- Return relation of overviews for the table with
-- the base table oid,
-- z level of the overview and overview table oid, ordered by z.
CREATE OR REPLACE FUNCTION CDB_Overviews(reloid REGCLASS)
RETURNS TABLE(base_table REGCLASS, z integer, overview_table REGCLASS)
AS $$
DECLARE
schema_name TEXT;
base_table_name TEXT;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, base_table_name;
RETURN QUERY SELECT
reloid AS base_table,
_CDB_OverviewTableZ(table_name) AS z,
table_regclass AS overview_table
FROM _CDB_UserTablesInSchema(schema_name)
WHERE _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=reloid), table_name)
ORDER BY z;
END
$$ LANGUAGE PLPGSQL;
-- Return existing overviews (if any) for multiple dataset tables.
-- Scope: public
-- Parameters
-- tables: Array of input tables oids
-- Return relation of overviews for the table with
-- the base table oid,
-- z level of the overview and overview table oid, ordered by z.
-- Note: CDB_Overviews can be applied to the result of CDB_QueryTablesText
-- to obtain the overviews applicable to a query.
CREATE OR REPLACE FUNCTION CDB_Overviews(tables regclass[])
RETURNS TABLE(base_table REGCLASS, z integer, overview_table REGCLASS)
AS $$
SELECT
base_table::regclass AS base_table,
_CDB_OverviewTableZ(table_name) AS z,
table_regclass AS overview_table
FROM
_CDB_UserTablesInSchema(), unnest(tables) base_table
WHERE
schema_name = _cdb_schema_name(base_table)
AND _CDB_IsOverviewTableOf((SELECT relname FROM pg_class WHERE oid=base_table), table_name)
ORDER BY base_table, z;
$$ LANGUAGE SQL;
-- Calculate the estimated extent of a cartodbfy'ed table.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table.
-- Return value A box2d extent in 3857.
CREATE OR REPLACE FUNCTION _cdb_estimated_extent(reloid REGCLASS)
RETURNS box2d
AS $$
DECLARE
ext box2d;
ext_query text;
table_id record;
BEGIN
SELECT n.nspname AS schema_name, c.relname table_name INTO STRICT table_id
FROM pg_class c JOIN pg_namespace n on n.oid = c.relnamespace WHERE c.oid = reloid::oid;
ext_query = format(
'SELECT ST_EstimatedExtent(''%1$s'', ''%2$s'', ''%3$s'');',
table_id.schema_name, table_id.table_name, 'the_geom_webmercator'
);
BEGIN
EXECUTE ext_query INTO ext;
EXCEPTION
-- This is the typical ERROR: stats for "mytable" do not exist
WHEN internal_error THEN
-- Get stats and execute again
EXECUTE format('ANALYZE %1$s', reloid);
-- We check the geometry type in case the error is due to empty geometries
IF _CDB_GeometryTypes(reloid) IS NULL THEN
RETURN NULL;
END IF;
EXECUTE ext_query INTO ext;
END;
RETURN ext;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
-- Determine the max feature density of a given dataset.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- nz: number of zoom levels to consider from z0 upward.
-- Return value: feature density (num_features / webmercator_squared_meters).
CREATE OR REPLACE FUNCTION _CDB_Feature_Density(reloid REGCLASS, nz integer)
RETURNS FLOAT8
AS $$
DECLARE
fd FLOAT8;
min_features TEXT;
n integer = 4;
c FLOAT8;
BEGIN
-- TODO: for small total count or extents we could just:
-- EXECUTE 'SELECT Count(*)/ST_Area(ST_Extent(the_geom_webmercator)) FROM ' || reloid::text || ';' INTO fd;
-- min_features is a SQL subexpression which can depend on z and represents
-- the minimum number of features to recursively consider a tile.
-- We can either use a fixed minimum number of features per tile
-- or a minimum feature density by dividing the number of features by
-- the area of tiles at level Z: c*c*power(2, -2*z)
-- with c = CDB_XYZ_Resolution(-8) (earth circumference)
min_features = '500';
SELECT CDB_XYZ_Resolution(-8) INTO c;
-- We first compute a set of *seed* tiles, of the minimum Z level, z0, such that
-- they cover the extent of the table and we have at least n of them in each
-- linear dimension (i.e. at least n*n tiles cover the extent).
-- We compute the number of features in these tiles, and recursively in
-- subtiles up to level z0 + nz. Then we compute the maximum of the feature
-- density (per tile area in webmercator squared meters) for all the
-- considered tiles.
EXECUTE Format('
WITH RECURSIVE t(x, y, z, e) AS (
WITH ext AS (SELECT _cdb_estimated_extent(%6$s) as g),
base AS (
SELECT
least(
-floor(log(2, (greatest(ST_XMax(ext.g)-ST_XMin(ext.g), ST_YMax(ext.g)-ST_YMin(ext.g))/(%4$s*%5$s))::numeric)),
_CDB_MaxOverviewLevel()+1
)::integer z
FROM ext
),
lim AS (
SELECT
FLOOR((ST_XMin(ext.g)+CDB_XYZ_Resolution(0)*128)/(CDB_XYZ_Resolution(base.z)*256))::integer x0,
FLOOR((ST_XMax(ext.g)+CDB_XYZ_Resolution(0)*128)/(CDB_XYZ_Resolution(base.z)*256))::integer x1,
FLOOR((CDB_XYZ_Resolution(0)*128-ST_YMin(ext.g))/(CDB_XYZ_Resolution(base.z)*256))::integer y1,
FLOOR((CDB_XYZ_Resolution(0)*128-ST_YMax(ext.g))/(CDB_XYZ_Resolution(base.z)*256))::integer y0
FROM ext, base
),
seed AS (
SELECT xt, yt, base.z, (
SELECT count(*) FROM %1$s
WHERE the_geom_webmercator && CDB_XYZ_Extent(xt, yt, base.z)
) e
FROM base, lim, generate_series(lim.x0, lim.x1) xt, generate_series(lim.y0, lim.y1) yt
)
SELECT * from seed
UNION ALL
SELECT x*2 + xx, y*2 + yy, t.z+1, (
SELECT count(*) FROM %1$s
WHERE the_geom_webmercator && CDB_XYZ_Extent(t.x*2 + c.xx, t.y*2 + c.yy, t.z+1)
)
FROM t, base, (VALUES (0, 0), (0, 1), (1, 1), (1, 0)) AS c(xx, yy)
WHERE t.e > %2$s AND t.z < least(base.z + %3$s, _CDB_MaxZoomLevel())
)
SELECT MAX(e/ST_Area(CDB_XYZ_Extent(x,y,z))) FROM t where e > 0;
', reloid::text, min_features, nz, n, c, reloid::oid)
INTO fd;
RETURN fd;
END
$$ LANGUAGE PLPGSQL STABLE;
-- Experimental default strategy to assign a reference base Z level
-- to a cartodbfied table. The resulting Z level represents the
-- minimum scale level at which the table data can be rendered
-- without overcrowded results or loss of detail.
-- Parameters:
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: Z level as an integer
CREATE OR REPLACE FUNCTION _CDB_Feature_Density_Ref_Z_Strategy(reloid REGCLASS, tolerance_px FLOAT8 DEFAULT NULL)
RETURNS INTEGER
AS $$
DECLARE
lim FLOAT8;
nz integer := 4;
fd FLOAT8;
c FLOAT8;
BEGIN
IF (tolerance_px IS NULL) OR tolerance_px = 0 THEN
lim := 500;
ELSE
lim := floor(power(256/tolerance_px, 2))/2;
END IF;
-- Compute fd as an estimation of the (maximum) number
-- of features per unit of tile area (in webmercator squared meters)
SELECT _CDB_Feature_Density(reloid, nz) INTO fd;
-- lim maximum number of (desiderable) features per tile
-- we have c = 2*Pi*R = CDB_XYZ_Resolution(-8) (earth circumference)
-- ta(z): tile area = power(c*power(2,-z), 2) = c*c*power(2,-2*z)
-- => fd*ta(z) is the average number of features per tile at level z
-- find minimum z so that fd*ta(z) <= lim
-- compute a rough 'feature density' value
SELECT CDB_XYZ_Resolution(-8) INTO c;
RETURN least(_CDB_MaxOverviewLevel()+1, ceil(log(2.0, (c*c*fd/lim)::numeric)/2));
END;
$$ LANGUAGE PLPGSQL STABLE;
-- Overview table name for a given Z level and base dataset or overview table
-- Scope: private.
-- Parameters:
-- ref reference table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level of the reference table
-- overview_z Z level of the overview to be named, must be smaller than ref_z
-- Return value: the name to be used for the overview. The name is always
-- unqualified (does not include a schema name).
CREATE OR REPLACE FUNCTION _CDB_Overview_Name(ref REGCLASS, ref_z INTEGER, overview_z INTEGER)
RETURNS TEXT
AS $$
DECLARE
schema_name TEXT;
base TEXT;
suffix TEXT;
is_overview BOOLEAN;
BEGIN
SELECT * FROM _cdb_split_table_name(ref) INTO schema_name, base;
SELECT _CDB_OverviewBaseTableName(base) INTO base;
RETURN _CDB_OverviewTableName(base, overview_z);
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- Sampling reduction method.
-- Valid for any kind of geometry.
-- Scope: private.
-- reloid original table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, tolerance_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
fraction FLOAT8;
base_name TEXT;
class_info RECORD;
num_samples INTEGER;
schema_name TEXT;
table_name TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- TODO: compute fraction from tolerance_px if not NULL
fraction := power(2, 2*(overview_z - ref_z));
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Estimate number of rows
SELECT reltuples, relpages FROM pg_class INTO STRICT class_info
WHERE oid = reloid::oid;
IF class_info.relpages < 2 OR fraction > 0.5 THEN
-- We'll avoid possible CDB_RandomTids problems
EXECUTE Format('
%s SELECT * FROM %s WHERE random() < %s;
', creation_clause, reloid, fraction);
ELSE
num_samples := ceil(class_info.reltuples*fraction);
EXECUTE Format('
%1$s SELECT * FROM %2$s
WHERE ctid = ANY (
ARRAY[
(SELECT CDB_RandomTids(''%2$s'', %3$s))
]
);
', creation_clause, reloid, num_samples);
END IF;
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- Register new overview table (post-creation chores)
-- Scope: private
-- Parameters:
-- dataset: oid of the input dataset table, It must be a cartodbfy'ed table.
-- overview_table: oid of the overview table to be registered.
-- overview_z: intended Z level for the overview table
-- This function is declared SECURITY DEFINER so it executes with the privileges
-- of the function creator to have a chance to alter the privileges of the
-- overview table to match those of the dataset. It will only perform any change
-- if the overview table belgons to the same scheme as the dataset and it
-- matches the scheme naming for overview tables.
CREATE OR REPLACE FUNCTION _CDB_Register_Overview(dataset REGCLASS, overview_table REGCLASS, overview_z INTEGER)
RETURNS VOID
AS $$
DECLARE
sql TEXT;
table_owner TEXT;
dataset_scheme TEXT;
dataset_name TEXT;
overview_scheme TEXT;
overview_name TEXT;
BEGIN
-- This function will only register a table as an overview table if it matches
-- the overviews naming scheme for the dataset and z level and the table belongs
-- to the same scheme as the the dataset
SELECT * FROM _cdb_split_table_name(dataset) INTO dataset_scheme, dataset_name;
SELECT * FROM _cdb_split_table_name(overview_table) INTO overview_scheme, overview_name;
IF dataset_scheme = overview_scheme AND
overview_name = _CDB_OverviewTableName(dataset_name, overview_z) THEN
-- preserve the owner of the base table
SELECT u.usename
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_user u ON (c.relowner=u.usesysid)
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relname = dataset_name::text AND n.nspname = dataset_scheme
INTO table_owner;
EXECUTE Format('ALTER TABLE IF EXISTS %s OWNER TO %I;', overview_table::text, table_owner);
-- preserve the table privileges
UPDATE pg_class c_to
SET relacl = c_from.relacl
FROM pg_class c_from
WHERE c_from.oid = dataset
AND c_to.oid = overview_table;
PERFORM _CDB_Add_Indexes(overview_table);
-- TODO: If metadata about existing overviews is to be stored
-- it should be done here (CDB_Overviews would consume such metadata)
END IF;
END
$$ LANGUAGE PLPGSQL SECURITY DEFINER;
-- Dataset attributes (column names other than the
-- CartoDB primary key and geometry columns) which should be aggregated
-- in aggregated overviews.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: set of attribute names
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes(reloid REGCLASS)
RETURNS SETOF information_schema.sql_identifier
AS $$
SELECT c FROM CDB_ColumnNames(reloid) c, _CDB_Columns() cdb
WHERE c NOT IN (
cdb.pkey, cdb.geomcol, cdb.mercgeomcol
)
$$ LANGUAGE SQL STABLE;
-- List of dataset attributes to be aggregated in aggregated overview
-- as a comma-separated SQL expression.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: SQL subexpression as text
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes_Expression(reloid REGCLASS)
RETURNS TEXT
AS $$
DECLARE
attr_list TEXT;
BEGIN
SELECT string_agg(s.c, ',') FROM (
SELECT * FROM _CDB_Aggregable_Attributes(reloid) c
) AS s INTO attr_list;
RETURN attr_list;
END
$$ LANGUAGE PLPGSQL STABLE;
-- Check if a column of a table is of an unlimited-length text type
CREATE OR REPLACE FUNCTION _cdb_unlimited_text_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
SELECT EXISTS (
SELECT a.attname
FROM pg_class c
LEFT JOIN pg_attribute a ON a.attrelid = c.oid
LEFT JOIN pg_type t ON t.oid = a.atttypid
WHERE c.oid = reloid
AND a.attname = col_name
AND format_type(a.atttypid, NULL) IN ('text', 'character varying', 'character')
AND format_type(a.atttypid, NULL) = format_type(a.atttypid, a.atttypmod)
);
$$ LANGUAGE SQL STABLE;
CREATE OR REPLACE FUNCTION _cdb_categorical_column(reloid REGCLASS, col_name TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
schema_name TEXT;
table_name TEXT;
available BOOLEAN;
categorical BOOLEAN;
BEGIN
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
SELECT n_distinct IS NOT NULL
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO available;
IF available IS NULL OR NOT available THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
SELECT n_distinct > 0 AND n_distinct <= 20
FROM pg_stats
WHERE pg_stats.schemaname = schema_name
AND pg_stats.tablename = table_name
AND pg_stats.attname = col_name
INTO categorical;
RETURN categorical;
END;
$$ LANGUAGE PLPGSQL VOLATILE;
CREATE OR REPLACE FUNCTION _cdb_mode_of_array(anyarray)
RETURNS anyelement AS
$$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$$
LANGUAGE SQL IMMUTABLE;
DROP AGGREGATE IF EXISTS _cdb_mode(anyelement);
CREATE AGGREGATE _cdb_mode(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_cdb_mode_of_array,
INITCOND='{}'
);
-- SQL Aggregation expression for a datase attribute
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- column_name: column to be aggregated
-- table_alias: (optional) table qualifier for the column to be aggregated
-- Return SQL subexpression as text with aggregated attribute aliased
-- with its original name.
CREATE OR REPLACE FUNCTION _CDB_Attribute_Aggregation_Expression(reloid REGCLASS, column_name TEXT, table_alias TEXT DEFAULT '')
RETURNS TEXT
AS $$
DECLARE
column_type TEXT;
qualified_column TEXT;
has_counter_column BOOLEAN;
feature_count TEXT;
total_feature_count TEXT;
base_table REGCLASS;
BEGIN
IF table_alias <> '' THEN
qualified_column := Format('%I.%I', table_alias, column_name);
ELSE
qualified_column := Format('%I', column_name);
END IF;
column_type := CDB_ColumnType(reloid, column_name);
SELECT EXISTS (
SELECT * FROM CDB_ColumnNames(reloid) as colname WHERE colname = '_feature_count'
) INTO has_counter_column;
IF has_counter_column THEN
feature_count := '_feature_count';
total_feature_count := 'SUM(_feature_count)';
ELSE
feature_count := '1';
total_feature_count := 'count(*)';
END IF;
base_table := _CDB_OverviewBaseTable(reloid);
CASE column_type
WHEN 'double precision', 'real', 'integer', 'bigint', 'numeric' THEN
IF column_name = '_feature_count' THEN
RETURN 'SUM(_feature_count)';
ELSE
IF column_type = 'integer' AND _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('CDB_Math_Mode(%s)::', qualified_column) || column_type;
ELSE
RETURN Format('SUM(%s*%s)/%s::' || column_type, qualified_column, feature_count, total_feature_count);
END IF;
END IF;
WHEN 'text', 'character varying', 'character' THEN
IF _cdb_categorical_column(base_table, column_name) THEN
RETURN Format('_cdb_mode(%s)::', qualified_column) || column_type;
ELSE
IF _cdb_unlimited_text_column(base_table, column_name) THEN
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN 'CASE WHEN count(distinct ' || qualified_column || ') = 1 THEN MIN(' || qualified_column || ') WHEN ' || total_feature_count || ' < 5 THEN string_agg(distinct ' || qualified_column || ','' / '') ELSE ''*'' END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END IF;
END IF;
WHEN 'boolean' THEN
RETURN 'CASE count(*) WHEN 1 THEN BOOL_AND(' || qualified_column || ') ELSE NULL END::' || column_type;
ELSE
RETURN 'CASE count(*) WHEN 1 THEN MIN(' || qualified_column || ') ELSE NULL END::' || column_type;
END CASE;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
-- List of dataset aggregated attributes as a comma-separated SQL expression.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- table_alias: (optional) table qualifier for the columns to be aggregated
-- Return value: SQL subexpression as text
CREATE OR REPLACE FUNCTION _CDB_Aggregated_Attributes_Expression(reloid REGCLASS, table_alias TEXT DEFAULT '')
RETURNS TEXT
AS $$
DECLARE
attr_list TEXT;
BEGIN
SELECT string_agg(_CDB_Attribute_Aggregation_Expression(reloid, s.c, table_alias) || Format(' AS %s', s.c), ',')
FROM (
SELECT * FROM _CDB_Aggregable_Attributes(reloid) c
) AS s INTO attr_list;
RETURN attr_list;
END
$$ LANGUAGE PLPGSQL STABLE;
-- Array of geometry types detected in a cartodbfied table
-- For effciency only look at a limited number of rwos.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: array of geometry type names
CREATE OR REPLACE FUNCTION _CDB_GeometryTypes(reloid REGCLASS)
RETURNS TEXT[]
AS $$
DECLARE
gtypes TEXT[];
BEGIN
EXECUTE Format('
SELECT array_agg(DISTINCT ST_GeometryType(the_geom)) FROM (
SELECT the_geom FROM %s
WHERE (the_geom is not null) LIMIT 10
) as geom_types
', reloid)
INTO gtypes;
RETURN gtypes;
END
$$ LANGUAGE PLPGSQL STABLE;
-- Experimental Overview reduction method for point datasets.
-- It clusters the points using a grid, then aggregates the point in each
-- cluster into a point at the centroid of the clustered records.
-- Scope: private.
-- Parameters:
-- reloid original table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
Format('ST_Transform(%s, 4326) AS the_geom', point_geom)
WHEN 'the_geom_webmercator' THEN
Format('%s AS the_geom_webmercator', point_geom)
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
RAISE INFO 'Deleting and inserting because % has overviews', overview_table_name;
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
RAISE INFO 'Creating a new table overview %', overview_table_name;
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
%3$s
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
WHERE f.the_geom_webmercator IS NOT NULL
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
WHEN 'the_geom' THEN
'ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom'
WHEN 'the_geom_webmercator' THEN
'ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
%3$s
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
SUM(ST_X(f.the_geom_webmercator)) AS _sum_of_x,
SUM(ST_Y(f.the_geom_webmercator)) AS _sum_of_y,
Floor(ST_Y(f.the_geom_webmercator)/%2$s)::int AS gy,
Floor(ST_X(f.the_geom_webmercator)/%2$s)::int AS gx,
MIN(cartodb_id) AS cartodb_id
FROM %1$s f
GROUP BY gx, gy
)
SELECT %6$s FROM clusters
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy(reloid REGCLASS, ref_z INTEGER, overview_z INTEGER, grid_px FLOAT8 DEFAULT NULL, has_overview_created BOOLEAN DEFAULT FALSE)
RETURNS REGCLASS
AS $$
DECLARE
overview_rel TEXT;
reduction FLOAT8;
base_name TEXT;
pixel_m FLOAT8;
grid_m FLOAT8;
offset_m FLOAT8;
offset_x TEXT;
offset_y TEXT;
cell_x TEXT;
cell_y TEXT;
aggr_attributes TEXT;
attributes TEXT;
columns TEXT;
gtypes TEXT[];
schema_name TEXT;
table_name TEXT;
point_geom TEXT;
overview_table_name TEXT;
creation_clause TEXT;
BEGIN
SELECT _CDB_GeometryTypes(reloid) INTO gtypes;
IF gtypes IS NULL OR array_upper(gtypes, 1) <> 1 OR gtypes[1] <> 'ST_Point' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL;
END IF;
--TODO: check applicability: geometry type, minimum number of points...
overview_rel := _CDB_Overview_Name(reloid, ref_z, overview_z);
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px := 1.0;
END IF;
SELECT * FROM _cdb_split_table_name(reloid) INTO schema_name, table_name;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution(overview_z) INTO pixel_m;
-- grid size in meters
grid_m = grid_px * pixel_m;
attributes := _CDB_Aggregable_Attributes_Expression(reloid);
aggr_attributes := _CDB_Aggregated_Attributes_Expression(reloid);
IF attributes <> '' THEN
attributes := ', ' || attributes;
END IF;
IF aggr_attributes <> '' THEN
aggr_attributes := aggr_attributes || ', ';
END IF;
-- Center of each cell:
cell_x := Format('gx*%1$s + %2$s', grid_m, grid_m/2);
cell_y := Format('gy*%1$s + %2$s', grid_m, grid_m/2);
-- Displacement to the nearest pixel center:
IF MOD(grid_px::numeric, 1.0::numeric) = 0 THEN
offset_m := pixel_m/2 - MOD((grid_m/2)::numeric, pixel_m::numeric)::float8;
offset_x := Format('%s', offset_m);
offset_y := Format('%s', offset_m);
ELSE
offset_x := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_x, pixel_m);
offset_y := Format('%2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8', cell_y, pixel_m);
END IF;
point_geom := Format('ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857)', cell_x, cell_y, offset_x, offset_y);
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN 'cartodb_id' THEN 'cartodb_id'
ELSE c
END AS column
FROM CDB_ColumnNames(reloid) c
)
SELECT string_agg(s.column, ',') FROM (
SELECT * FROM cols
) AS s INTO columns;
IF NOT columns LIKE '%_feature_count%' THEN
columns := columns || ', n AS _feature_count';
END IF;
overview_table_name := Format('%I.%I', schema_name, overview_rel);
IF has_overview_created THEN
EXECUTE Format('DELETE FROM %s;', overview_table_name);
creation_clause := Format('INSERT INTO %s', overview_table_name);
ELSE
creation_clause := Format('CREATE TABLE %s AS', overview_table_name);
END IF;
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format('
%3$s
WITH clusters AS (
SELECT
%5$s
count(*) AS n,
Floor(ST_X(_f.the_geom_webmercator)/%2$s)::int AS gx,
Floor(ST_Y(_f.the_geom_webmercator)/%2$s)::int AS gy,
MIN(cartodb_id) AS cartodb_id
FROM %1$s _f
GROUP BY gx, gy
),
cluster_geom AS (
SELECT the_geom, the_geom_webmercator, clusters.*
FROM clusters INNER JOIN %1$s _g ON (clusters.cartodb_id = _g.cartodb_id)
)
SELECT %6$s FROM cluster_geom
', reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
RETURN Format('%s', overview_table_name)::regclass;
END;
$$ LANGUAGE PLPGSQL;
-- Create overview tables for a dataset.
-- Scope: public
-- Parameters:
-- reloid: oid of the input table. It must be a cartodbfy'ed table with
-- vector features.
-- refscale_strategy: function that computes the reference Z of the dataset
-- reduce_strategy: function that generates overviews from a base table
-- or higher level overview. The overview tables
-- created by the strategy must have the same columns
-- as the base table and in the same order.
-- Return value: Array with the names of the generated overview tables
CREATE OR REPLACE FUNCTION CDB_CreateOverviews(reloid REGCLASS, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
tolerance_px FLOAT8;
BEGIN
-- Use the default tolerance
tolerance_px := 1.0;
RETURN CDB_CreateOverviewsWithToleranceInPixels(reloid, tolerance_px, refscale_strategy, reduce_strategy);
END;
$$ LANGUAGE PLPGSQL;
-- Create overviews with additional parameter to define the desired detail/tolerance in pixels
CREATE OR REPLACE FUNCTION CDB_CreateOverviewsWithToleranceInPixels(reloid REGCLASS, tolerance_px FLOAT8, refscale_strategy regproc DEFAULT '_CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8)'::regprocedure, reduce_strategy regproc DEFAULT '_CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN)'::regprocedure)
RETURNS text[]
AS $$
DECLARE
ref_z integer;
overviews_z integer[];
base_z integer;
base_rel REGCLASS;
overview_z integer;
overview_tables REGCLASS[];
overviews_step integer := 1;
has_counter_column boolean;
has_overviews_for_z boolean;
BEGIN
-- Determine the referece zoom level
EXECUTE 'SELECT ' || quote_ident(refscale_strategy::text) || Format('(''%s'', %s);', reloid, tolerance_px) INTO ref_z;
IF ref_z < 0 OR ref_z IS NULL THEN
RETURN NULL;
END IF;
-- Determine overlay zoom levels
-- TODO: should be handled by the refscale_strategy?
overview_z := ref_z - 1;
WHILE overview_z >= 0 LOOP
SELECT array_append(overviews_z, overview_z) INTO overviews_z;
overview_z := overview_z - overviews_step;
END LOOP;
-- TODO Get the diff between existing overviews and new overviews we're going to create
-- FOr example we have overviews until zoom level 10 and we add from lvl 11-16 so
-- that new overviews should be created and registered. This should be take into
-- account otherwise whe we remove zoom levels from the overviews but this is a tricky
-- case because the old query could be using that tables and we can provoke a DeadLock
-- Create overlay tables
base_z := ref_z;
base_rel := reloid;
FOREACH overview_z IN ARRAY overviews_z LOOP
SELECT CASE WHEN count(*) > 0 THEN TRUE ELSE FALSE END from CDB_Overviews(reloid) WHERE z = overview_z INTO has_overviews_for_z;
EXECUTE 'SELECT ' || quote_ident(reduce_strategy::text) || Format('(''%s'', %s, %s, %s, ''%s'');', base_rel, base_z, overview_z, tolerance_px, has_overviews_for_z) INTO base_rel;
IF base_rel IS NULL THEN
EXIT;
END IF;
base_z := overview_z;
IF NOT has_overviews_for_z THEN
RAISE INFO 'Registering overview: %', base_rel;
PERFORM _CDB_Register_Overview(reloid, base_rel, base_z);
END IF;
SELECT array_append(overview_tables, base_rel) INTO overview_tables;
END LOOP;
IF overview_tables IS NOT NULL AND array_length(overview_tables, 1) > 0 THEN
SELECT EXISTS (
SELECT * FROM CDB_ColumnNames(reloid) as colname WHERE colname = '_feature_count'
) INTO has_counter_column;
IF NOT has_counter_column THEN
EXECUTE Format('
ALTER TABLE %s ADD COLUMN _feature_count integer DEFAULT 1;
', reloid);
END IF;
END IF;
RETURN overview_tables;
END;
$$ LANGUAGE PLPGSQL;
-- Here are some older signatures of these functions, no longer in use.
-- They must be droped here, after the (new) definition of the function `CDB_CreateOverviews`
-- because that function used to contain references to them in the default argument values.
DROP FUNCTION IF EXISTS _CDB_Feature_Density_Ref_Z_Strategy(REGCLASS);
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8);
DROP FUNCTION IF EXISTS _CDB_GridClusterCentroid_Reduce_Strategy(REGCLASS, INTEGER, INTEGER, FLOAT8);
DROP FUNCTION IF EXISTS _CDB_GridClusterSample_Reduce_Strategy(REGCLASS, INTEGER, INTEGER, FLOAT8);
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy(REGCLASS,INTEGER,INTEGER);
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8);