2016-01-12 23:20:11 +08:00
-- Remove a dataset's existing overview tables.
-- Scope: public
-- Parameters:
-- reloid: oid of the table.
2016-01-12 22:35:50 +08:00
CREATE OR REPLACE FUNCTION CDB_DropOverviews ( reloid REGCLASS )
RETURNS void
AS $ $
DECLARE
2016-04-04 23:45:20 +08:00
row record ;
schema_name TEXT ;
table_name TEXT ;
2016-01-12 22:35:50 +08:00
BEGIN
2016-04-04 23:45:20 +08:00
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
2016-01-12 22:35:50 +08:00
FOR row IN
SELECT * FROM CDB_Overviews ( reloid )
LOOP
2016-04-07 00:57:52 +08:00
EXECUTE Format ( ' DROP TABLE %s; ' , row . overview_table ) ;
2016-01-12 22:35:50 +08:00
RAISE NOTICE ' Dropped overview for level %: % ' , row . z , row . overview_table ;
END LOOP ;
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2016-01-12 22:35:50 +08:00
2016-01-26 17:39:26 +08:00
2016-01-11 21:50:10 +08:00
-- Return existing overviews (if any) for a given dataset table
-- Scope: public
-- Parameters
-- reloid: oid of the input table.
-- Return relation of overviews for the table with
2016-01-26 17:39:26 +08:00
-- the base table oid,
-- z level of the overview and overview table oid, ordered by z.
2016-01-11 21:50:10 +08:00
CREATE OR REPLACE FUNCTION CDB_Overviews ( reloid REGCLASS )
2016-01-26 17:39:26 +08:00
RETURNS TABLE ( base_table REGCLASS , z integer , overview_table REGCLASS )
2016-01-11 21:50:10 +08:00
AS $ $
2016-04-04 23:02:59 +08:00
DECLARE
schema_name TEXT ;
2016-04-07 16:24:02 +08:00
base_table_name TEXT ;
2016-04-04 23:02:59 +08:00
BEGIN
2016-04-07 16:24:02 +08:00
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , base_table_name ;
2016-04-04 23:02:59 +08:00
RETURN QUERY SELECT
reloid AS base_table ,
2016-04-07 06:07:45 +08:00
_CDB_OverviewTableZ ( table_name ) AS z ,
table_regclass AS overview_table
FROM _CDB_UserTablesInSchema ( schema_name )
WHERE _CDB_IsOverviewTableOf ( ( SELECT relname FROM pg_class WHERE oid = reloid ) , table_name )
2016-04-04 23:02:59 +08:00
ORDER BY z ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL STABLE PARALLEL RESTRICTED ;
2016-01-11 21:50:10 +08:00
2016-01-26 17:39:26 +08:00
-- Return existing overviews (if any) for multiple dataset tables.
-- Scope: public
-- Parameters
-- tables: Array of input tables oids
-- Return relation of overviews for the table with
-- the base table oid,
-- z level of the overview and overview table oid, ordered by z.
-- Note: CDB_Overviews can be applied to the result of CDB_QueryTablesText
-- to obtain the overviews applicable to a query.
CREATE OR REPLACE FUNCTION CDB_Overviews ( tables regclass [ ] )
RETURNS TABLE ( base_table REGCLASS , z integer , overview_table REGCLASS )
AS $ $
SELECT
2016-01-26 20:20:28 +08:00
base_table : : regclass AS base_table ,
2016-04-07 06:07:45 +08:00
_CDB_OverviewTableZ ( table_name ) AS z ,
table_regclass AS overview_table
2016-01-26 17:39:26 +08:00
FROM
2016-04-07 06:07:45 +08:00
_CDB_UserTablesInSchema ( ) , unnest ( tables ) base_table
WHERE
schema_name = _cdb_schema_name ( base_table )
AND _CDB_IsOverviewTableOf ( ( SELECT relname FROM pg_class WHERE oid = base_table ) , table_name )
2016-01-26 17:39:26 +08:00
ORDER BY base_table , z ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE SQL STABLE PARALLEL SAFE ;
2016-01-26 17:39:26 +08:00
2015-12-22 00:41:28 +08:00
-- Calculate the estimated extent of a cartodbfy'ed table.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table.
-- Return value A box2d extent in 3857.
CREATE OR REPLACE FUNCTION _cdb_estimated_extent ( reloid REGCLASS )
RETURNS box2d
AS $ $
DECLARE
ext box2d ;
ext_query text ;
table_id record ;
BEGIN
SELECT n . nspname AS schema_name , c . relname table_name INTO STRICT table_id
FROM pg_class c JOIN pg_namespace n on n . oid = c . relnamespace WHERE c . oid = reloid : : oid ;
ext_query = format (
2016-05-09 17:54:56 +08:00
' SELECT ST_EstimatedExtent( '' %1$s '' , '' %2$s '' , '' %3$s '' ); ' ,
2015-12-22 00:41:28 +08:00
table_id . schema_name , table_id . table_name , ' the_geom_webmercator '
) ;
BEGIN
EXECUTE ext_query INTO ext ;
2016-04-13 23:51:30 +08:00
EXCEPTION
2015-12-22 00:41:28 +08:00
-- This is the typical ERROR: stats for "mytable" do not exist
WHEN internal_error THEN
-- Get stats and execute again
2016-04-07 13:07:20 +08:00
EXECUTE format ( ' ANALYZE %1$s ' , reloid ) ;
2016-04-13 23:49:38 +08:00
-- We check the geometry type in case the error is due to empty geometries
IF _CDB_GeometryTypes ( reloid ) IS NULL THEN
RETURN NULL ;
END IF ;
2015-12-22 00:41:28 +08:00
EXECUTE ext_query INTO ext ;
END ;
RETURN ext ;
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2015-12-22 00:41:28 +08:00
2015-12-21 20:16:57 +08:00
-- Determine the max feature density of a given dataset.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- nz: number of zoom levels to consider from z0 upward.
-- Return value: feature density (num_features / webmercator_squared_meters).
2015-12-16 23:39:38 +08:00
CREATE OR REPLACE FUNCTION _CDB_Feature_Density ( reloid REGCLASS , nz integer )
2015-12-16 19:14:37 +08:00
RETURNS FLOAT8
AS $ $
DECLARE
fd FLOAT8 ;
min_features TEXT ;
2015-12-16 23:39:38 +08:00
n integer = 4 ;
c FLOAT8 ;
2015-12-16 19:14:37 +08:00
BEGIN
-- TODO: for small total count or extents we could just:
-- EXECUTE 'SELECT Count(*)/ST_Area(ST_Extent(the_geom_webmercator)) FROM ' || reloid::text || ';' INTO fd;
-- min_features is a SQL subexpression which can depend on z and represents
-- the minimum number of features to recursively consider a tile.
-- We can either use a fixed minimum number of features per tile
-- or a minimum feature density by dividing the number of features by
-- the area of tiles at level Z: c*c*power(2, -2*z)
-- with c = CDB_XYZ_Resolution(-8) (earth circumference)
min_features = ' 500 ' ;
2015-12-16 23:39:38 +08:00
SELECT CDB_XYZ_Resolution ( - 8 ) INTO c ;
2015-12-16 19:14:37 +08:00
2015-12-16 23:39:38 +08:00
-- We first compute a set of *seed* tiles, of the minimum Z level, z0, such that
-- they cover the extent of the table and we have at least n of them in each
-- linear dimension (i.e. at least n*n tiles cover the extent).
-- We compute the number of features in these tiles, and recursively in
-- subtiles up to level z0 + nz. Then we compute the maximum of the feature
-- density (per tile area in webmercator squared meters) for all the
-- considered tiles.
2015-12-16 19:14:37 +08:00
EXECUTE Format ( '
WITH RECURSIVE t ( x , y , z , e ) AS (
2015-12-22 01:34:26 +08:00
WITH ext AS ( SELECT _cdb_estimated_extent ( % 6 $ s ) as g ) ,
2015-12-16 23:39:38 +08:00
base AS (
2016-05-25 20:00:55 +08:00
SELECT
least (
- floor ( log ( 2 , ( greatest ( ST_XMax ( ext . g ) - ST_XMin ( ext . g ) , ST_YMax ( ext . g ) - ST_YMin ( ext . g ) ) / ( % 4 $ s * % 5 $ s ) ) : : numeric ) ) ,
_CDB_MaxOverviewLevel ( ) + 1
) : : integer z
2015-12-16 23:39:38 +08:00
FROM ext
) ,
lim AS (
SELECT
FLOOR ( ( ST_XMin ( ext . g ) + CDB_XYZ_Resolution ( 0 ) * 128 ) / ( CDB_XYZ_Resolution ( base . z ) * 256 ) ) : : integer x0 ,
FLOOR ( ( ST_XMax ( ext . g ) + CDB_XYZ_Resolution ( 0 ) * 128 ) / ( CDB_XYZ_Resolution ( base . z ) * 256 ) ) : : integer x1 ,
FLOOR ( ( CDB_XYZ_Resolution ( 0 ) * 128 - ST_YMin ( ext . g ) ) / ( CDB_XYZ_Resolution ( base . z ) * 256 ) ) : : integer y1 ,
FLOOR ( ( CDB_XYZ_Resolution ( 0 ) * 128 - ST_YMax ( ext . g ) ) / ( CDB_XYZ_Resolution ( base . z ) * 256 ) ) : : integer y0
FROM ext , base
) ,
seed AS (
2016-05-25 20:00:55 +08:00
SELECT xt , yt , base . z , (
2015-12-16 23:39:38 +08:00
SELECT count ( * ) FROM % 1 $ s
WHERE the_geom_webmercator & & CDB_XYZ_Extent ( xt , yt , base . z )
) e
FROM base , lim , generate_series ( lim . x0 , lim . x1 ) xt , generate_series ( lim . y0 , lim . y1 ) yt
2015-12-16 19:14:37 +08:00
)
2015-12-16 23:39:38 +08:00
SELECT * from seed
2015-12-16 19:14:37 +08:00
UNION ALL
2015-12-16 23:39:38 +08:00
SELECT x * 2 + xx , y * 2 + yy , t . z + 1 , (
2015-12-16 19:14:37 +08:00
SELECT count ( * ) FROM % 1 $ s
2016-05-27 00:25:11 +08:00
WHERE the_geom_webmercator & & CDB_XYZ_Extent ( t . x * 2 + c . xx , t . y * 2 + c . yy , t . z + 1 )
2015-12-16 19:14:37 +08:00
)
2015-12-16 23:39:38 +08:00
FROM t , base , ( VALUES ( 0 , 0 ) , ( 0 , 1 ) , ( 1 , 1 ) , ( 1 , 0 ) ) AS c ( xx , yy )
2016-05-25 20:00:55 +08:00
WHERE t . e > % 2 $ s AND t . z < least ( base . z + % 3 $ s , _CDB_MaxZoomLevel ( ) )
2015-12-16 19:14:37 +08:00
)
SELECT MAX ( e / ST_Area ( CDB_XYZ_Extent ( x , y , z ) ) ) FROM t where e > 0 ;
2015-12-22 01:34:26 +08:00
' , reloid::text, min_features, nz, n, c, reloid::oid)
2015-12-16 19:14:37 +08:00
INTO fd ;
RETURN fd ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2015-12-16 19:14:37 +08:00
2015-12-23 19:42:40 +08:00
-- Experimental default strategy to assign a reference base Z level
-- to a cartodbfied table. The resulting Z level represents the
-- minimum scale level at which the table data can be rendered
-- without overcrowded results or loss of detail.
-- Parameters:
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: Z level as an integer
2016-04-05 01:21:10 +08:00
CREATE OR REPLACE FUNCTION _CDB_Feature_Density_Ref_Z_Strategy ( reloid REGCLASS , tolerance_px FLOAT8 DEFAULT NULL )
2015-12-16 00:36:27 +08:00
RETURNS INTEGER
AS $ $
2015-12-16 02:12:48 +08:00
DECLARE
2016-04-05 01:21:10 +08:00
lim FLOAT8 ;
2015-12-16 23:39:38 +08:00
nz integer : = 4 ;
2015-12-16 02:12:48 +08:00
fd FLOAT8 ;
c FLOAT8 ;
2015-12-16 00:36:27 +08:00
BEGIN
2016-04-05 01:21:10 +08:00
IF ( tolerance_px IS NULL ) OR tolerance_px = 0 THEN
lim : = 500 ;
ELSE
lim : = floor ( power ( 256 / tolerance_px , 2 ) ) / 2 ;
END IF ;
2015-12-16 19:14:37 +08:00
-- Compute fd as an estimation of the (maximum) number
-- of features per unit of tile area (in webmercator squared meters)
2015-12-16 23:39:38 +08:00
SELECT _CDB_Feature_Density ( reloid , nz ) INTO fd ;
2015-12-16 02:12:48 +08:00
-- lim maximum number of (desiderable) features per tile
-- we have c = 2*Pi*R = CDB_XYZ_Resolution(-8) (earth circumference)
2016-04-04 23:45:20 +08:00
-- ta(z): tile area = power(c*power(2,-z), 2) = c*c*power(2,-2*z)
-- => fd*ta(z) is the average number of features per tile at level z
2015-12-16 02:12:48 +08:00
-- find minimum z so that fd*ta(z) <= lim
-- compute a rough 'feature density' value
SELECT CDB_XYZ_Resolution ( - 8 ) INTO c ;
2016-05-25 20:00:55 +08:00
RETURN least ( _CDB_MaxOverviewLevel ( ) + 1 , ceil ( log ( 2 . 0 , ( c * c * fd / lim ) : : numeric ) / 2 ) ) ;
2015-12-16 00:36:27 +08:00
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2015-12-16 00:36:27 +08:00
2015-12-23 19:42:40 +08:00
-- Overview table name for a given Z level and base dataset or overview table
-- Scope: private.
-- Parameters:
-- ref reference table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level of the reference table
-- overview_z Z level of the overview to be named, must be smaller than ref_z
2016-01-14 01:49:27 +08:00
-- Return value: the name to be used for the overview. The name is always
-- unqualified (does not include a schema name).
2015-12-16 00:36:27 +08:00
CREATE OR REPLACE FUNCTION _CDB_Overview_Name ( ref REGCLASS , ref_z INTEGER , overview_z INTEGER )
RETURNS TEXT
AS $ $
DECLARE
2016-01-14 01:49:27 +08:00
schema_name TEXT ;
2015-12-16 00:36:27 +08:00
base TEXT ;
suffix TEXT ;
is_overview BOOLEAN ;
BEGIN
2016-01-14 01:49:27 +08:00
SELECT * FROM _cdb_split_table_name ( ref ) INTO schema_name , base ;
2016-01-26 20:20:28 +08:00
SELECT _CDB_OverviewBaseTableName ( base ) INTO base ;
RETURN _CDB_OverviewTableName ( base , overview_z ) ;
2015-12-16 00:36:27 +08:00
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL IMMUTABLE PARALLEL SAFE ;
2015-12-16 00:36:27 +08:00
2015-12-29 02:28:20 +08:00
-- Sampling reduction method.
-- Valid for any kind of geometry.
2015-12-23 19:42:40 +08:00
-- Scope: private.
-- reloid original table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION _CDB_Sampling_Reduce_Strategy ( reloid REGCLASS , ref_z INTEGER , overview_z INTEGER , tolerance_px FLOAT8 DEFAULT NULL , has_overview_created BOOLEAN DEFAULT FALSE )
2015-12-16 00:36:27 +08:00
RETURNS REGCLASS
AS $ $
DECLARE
overview_rel TEXT ;
2015-12-29 02:41:14 +08:00
fraction FLOAT8 ;
2015-12-16 00:36:27 +08:00
base_name TEXT ;
2015-12-29 02:41:14 +08:00
class_info RECORD ;
2015-12-29 02:28:20 +08:00
num_samples INTEGER ;
2016-04-04 23:45:20 +08:00
schema_name TEXT ;
table_name TEXT ;
2017-06-02 19:06:55 +08:00
overview_table_name TEXT ;
creation_clause TEXT ;
2015-12-16 00:36:27 +08:00
BEGIN
overview_rel : = _CDB_Overview_Name ( reloid , ref_z , overview_z ) ;
2016-04-05 01:21:10 +08:00
-- TODO: compute fraction from tolerance_px if not NULL
2015-12-29 02:41:14 +08:00
fraction : = power ( 2 , 2 * ( overview_z - ref_z ) ) ;
2015-12-29 02:28:20 +08:00
2016-04-04 23:45:20 +08:00
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
2017-06-02 19:06:55 +08:00
overview_table_name : = Format ( ' %I.%I ' , schema_name , overview_rel ) ;
IF has_overview_created THEN
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Sampling reduce stategy deleting and inserting because % has overviews ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
EXECUTE Format ( ' DELETE FROM %s; ' , overview_table_name ) ;
creation_clause : = Format ( ' INSERT INTO %s ' , overview_table_name ) ;
ELSE
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Sampling reduce stategy creating a new table overview % ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
creation_clause : = Format ( ' CREATE TABLE %s AS ' , overview_table_name ) ;
END IF ;
2015-12-29 02:28:20 +08:00
-- Estimate number of rows
2015-12-29 02:41:14 +08:00
SELECT reltuples , relpages FROM pg_class INTO STRICT class_info
2015-12-29 02:28:20 +08:00
WHERE oid = reloid : : oid ;
2015-12-29 02:41:14 +08:00
IF class_info . relpages < 2 OR fraction > 0 . 5 THEN
-- We'll avoid possible CDB_RandomTids problems
EXECUTE Format ( '
2017-06-02 19:06:55 +08:00
% s SELECT * FROM % s WHERE random ( ) < % s ;
' , creation_clause, reloid, fraction);
2015-12-29 02:41:14 +08:00
ELSE
num_samples : = ceil ( class_info . reltuples * fraction ) ;
EXECUTE Format ( '
2017-06-02 19:06:55 +08:00
% 1 $ s SELECT * FROM % 2 $ s
2015-12-29 02:41:14 +08:00
WHERE ctid = ANY (
ARRAY [
( SELECT CDB_RandomTids ( ' ' % 2 $ s ' ' , % 3 $ s ) )
]
) ;
2017-06-02 19:06:55 +08:00
' , creation_clause, reloid, num_samples);
2015-12-29 02:41:14 +08:00
END IF ;
2016-01-11 22:34:10 +08:00
2017-06-02 19:06:55 +08:00
RETURN Format ( ' %s ' , overview_table_name ) : : regclass ;
2015-12-16 00:36:27 +08:00
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2015-12-16 00:36:27 +08:00
2015-12-23 21:17:50 +08:00
-- Register new overview table (post-creation chores)
-- Scope: private
-- Parameters:
-- dataset: oid of the input dataset table, It must be a cartodbfy'ed table.
-- overview_table: oid of the overview table to be registered.
-- overview_z: intended Z level for the overview table
2016-01-29 00:04:06 +08:00
-- This function is declared SECURITY DEFINER so it executes with the privileges
-- of the function creator to have a chance to alter the privileges of the
-- overview table to match those of the dataset. It will only perform any change
-- if the overview table belgons to the same scheme as the dataset and it
-- matches the scheme naming for overview tables.
2015-12-23 21:17:50 +08:00
CREATE OR REPLACE FUNCTION _CDB_Register_Overview ( dataset REGCLASS , overview_table REGCLASS , overview_z INTEGER )
RETURNS VOID
AS $ $
DECLARE
sql TEXT ;
2015-12-29 22:22:12 +08:00
table_owner TEXT ;
2016-01-29 00:04:06 +08:00
dataset_scheme TEXT ;
dataset_name TEXT ;
overview_scheme TEXT ;
overview_name TEXT ;
2015-12-23 21:17:50 +08:00
BEGIN
2016-01-29 00:04:06 +08:00
-- This function will only register a table as an overview table if it matches
-- the overviews naming scheme for the dataset and z level and the table belongs
-- to the same scheme as the the dataset
SELECT * FROM _cdb_split_table_name ( dataset ) INTO dataset_scheme , dataset_name ;
SELECT * FROM _cdb_split_table_name ( overview_table ) INTO overview_scheme , overview_name ;
IF dataset_scheme = overview_scheme AND
overview_name = _CDB_OverviewTableName ( dataset_name , overview_z ) THEN
-- preserve the owner of the base table
SELECT u . usename
2016-04-07 13:52:58 +08:00
FROM pg_catalog . pg_class c
JOIN pg_catalog . pg_user u ON ( c . relowner = u . usesysid )
JOIN pg_namespace n ON n . oid = c . relnamespace
WHERE c . relname = dataset_name : : text AND n . nspname = dataset_scheme
2016-01-29 00:04:06 +08:00
INTO table_owner ;
2016-04-07 13:52:58 +08:00
2016-01-29 00:04:06 +08:00
EXECUTE Format ( ' ALTER TABLE IF EXISTS %s OWNER TO %I; ' , overview_table : : text , table_owner ) ;
-- preserve the table privileges
UPDATE pg_class c_to
SET relacl = c_from . relacl
FROM pg_class c_from
WHERE c_from . oid = dataset
AND c_to . oid = overview_table ;
PERFORM _CDB_Add_Indexes ( overview_table ) ;
-- TODO: If metadata about existing overviews is to be stored
-- it should be done here (CDB_Overviews would consume such metadata)
END IF ;
2015-12-23 21:17:50 +08:00
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE SECURITY DEFINER ;
2015-12-23 21:17:50 +08:00
2015-12-23 19:17:37 +08:00
-- Dataset attributes (column names other than the
-- CartoDB primary key and geometry columns) which should be aggregated
-- in aggregated overviews.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: set of attribute names
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes ( reloid REGCLASS )
RETURNS SETOF information_schema . sql_identifier
AS $ $
2016-01-11 22:34:10 +08:00
SELECT c FROM CDB_ColumnNames ( reloid ) c , _CDB_Columns ( ) cdb
2015-12-23 19:17:37 +08:00
WHERE c NOT IN (
cdb . pkey , cdb . geomcol , cdb . mercgeomcol
)
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE SQL STABLE PARALLEL SAFE ;
2015-12-23 19:17:37 +08:00
-- List of dataset attributes to be aggregated in aggregated overview
-- as a comma-separated SQL expression.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: SQL subexpression as text
CREATE OR REPLACE FUNCTION _CDB_Aggregable_Attributes_Expression ( reloid REGCLASS )
RETURNS TEXT
AS $ $
DECLARE
attr_list TEXT ;
BEGIN
SELECT string_agg ( s . c , ' , ' ) FROM (
SELECT * FROM _CDB_Aggregable_Attributes ( reloid ) c
) AS s INTO attr_list ;
RETURN attr_list ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL STABLE PARALLEL SAFE ;
2015-12-23 19:17:37 +08:00
2016-04-15 18:37:16 +08:00
-- Check if a column of a table is of an unlimited-length text type
CREATE OR REPLACE FUNCTION _cdb_unlimited_text_column ( reloid REGCLASS , col_name TEXT )
RETURNS BOOLEAN
AS $ $
SELECT EXISTS (
2016-04-19 00:50:37 +08:00
SELECT a . attname
FROM pg_class c
2016-04-25 22:30:58 +08:00
LEFT JOIN pg_attribute a ON a . attrelid = c . oid
LEFT JOIN pg_type t ON t . oid = a . atttypid
2016-04-19 00:50:37 +08:00
WHERE c . oid = reloid
AND a . attname = col_name
2016-04-25 22:30:58 +08:00
AND format_type ( a . atttypid , NULL ) IN ( ' text ' , ' character varying ' , ' character ' )
AND format_type ( a . atttypid , NULL ) = format_type ( a . atttypid , a . atttypmod )
2016-04-15 18:37:16 +08:00
) ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE SQL STABLE PARALLEL SAFE ;
2016-04-15 18:37:16 +08:00
2016-04-26 21:15:24 +08:00
CREATE OR REPLACE FUNCTION _cdb_categorical_column ( reloid REGCLASS , col_name TEXT )
RETURNS BOOLEAN
AS $ $
DECLARE
schema_name TEXT ;
table_name TEXT ;
2016-04-27 21:06:09 +08:00
available BOOLEAN ;
2016-04-26 21:15:24 +08:00
categorical BOOLEAN ;
BEGIN
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
2016-04-27 21:06:09 +08:00
SELECT n_distinct IS NOT NULL
FROM pg_stats
WHERE pg_stats . schemaname = schema_name
AND pg_stats . tablename = table_name
AND pg_stats . attname = col_name
INTO available ;
IF available IS NULL OR NOT available THEN
EXECUTE Format ( ' ANALYZE %s; ' , reloid ) ;
END IF ;
SELECT n_distinct > 0 AND n_distinct < = 20
2016-04-26 21:15:24 +08:00
FROM pg_stats
WHERE pg_stats . schemaname = schema_name
AND pg_stats . tablename = table_name
AND pg_stats . attname = col_name
INTO categorical ;
RETURN categorical ;
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL RESTRICTED ;
2016-04-26 21:15:24 +08:00
CREATE OR REPLACE FUNCTION _cdb_mode_of_array ( anyarray )
RETURNS anyelement AS
$ $
SELECT a
FROM unnest ( $ 1 ) a
GROUP BY 1
ORDER BY COUNT ( 1 ) DESC , 1
LIMIT 1 ;
$ $
2017-10-24 20:16:56 +08:00
LANGUAGE SQL IMMUTABLE PARALLEL SAFE ;
2016-04-26 21:15:24 +08:00
2016-04-29 14:46:01 +08:00
DROP AGGREGATE IF EXISTS _cdb_mode ( anyelement ) ;
2016-04-26 21:15:24 +08:00
CREATE AGGREGATE _cdb_mode ( anyelement ) (
SFUNC = array_append ,
STYPE = anyarray ,
FINALFUNC = _cdb_mode_of_array ,
2017-10-25 23:58:10 +08:00
PARALLEL = SAFE ,
INITCOND = ' {} '
2016-04-26 21:15:24 +08:00
) ;
2015-12-23 19:17:37 +08:00
-- SQL Aggregation expression for a datase attribute
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- column_name: column to be aggregated
-- table_alias: (optional) table qualifier for the column to be aggregated
-- Return SQL subexpression as text with aggregated attribute aliased
-- with its original name.
CREATE OR REPLACE FUNCTION _CDB_Attribute_Aggregation_Expression ( reloid REGCLASS , column_name TEXT , table_alias TEXT DEFAULT ' ' )
RETURNS TEXT
AS $ $
DECLARE
column_type TEXT ;
qualified_column TEXT ;
2016-04-15 16:48:08 +08:00
has_counter_column BOOLEAN ;
feature_count TEXT ;
total_feature_count TEXT ;
2016-04-27 00:02:25 +08:00
base_table REGCLASS ;
2015-12-23 19:17:37 +08:00
BEGIN
IF table_alias < > ' ' THEN
qualified_column : = Format ( ' %I.%I ' , table_alias , column_name ) ;
ELSE
qualified_column : = Format ( ' %I ' , column_name ) ;
END IF ;
2016-01-11 22:34:10 +08:00
column_type : = CDB_ColumnType ( reloid , column_name ) ;
2015-12-23 19:17:37 +08:00
2016-04-15 16:48:08 +08:00
SELECT EXISTS (
SELECT * FROM CDB_ColumnNames ( reloid ) as colname WHERE colname = ' _feature_count '
) INTO has_counter_column ;
IF has_counter_column THEN
feature_count : = ' _feature_count ' ;
total_feature_count : = ' SUM(_feature_count) ' ;
ELSE
feature_count : = ' 1 ' ;
total_feature_count : = ' count(*) ' ;
END IF ;
2016-04-27 00:02:25 +08:00
base_table : = _CDB_OverviewBaseTable ( reloid ) ;
2015-12-23 19:17:37 +08:00
CASE column_type
2016-04-14 21:46:03 +08:00
WHEN ' double precision ' , ' real ' , ' integer ' , ' bigint ' , ' numeric ' THEN
2016-04-15 00:23:09 +08:00
IF column_name = ' _feature_count ' THEN
RETURN ' SUM(_feature_count) ' ;
2016-04-12 17:10:55 +08:00
ELSE
2016-04-27 00:02:25 +08:00
IF column_type = ' integer ' AND _cdb_categorical_column ( base_table , column_name ) THEN
2016-04-26 21:15:24 +08:00
RETURN Format ( ' CDB_Math_Mode(%s):: ' , qualified_column ) | | column_type ;
ELSE
RETURN Format ( ' SUM(%s*%s)/%s:: ' | | column_type , qualified_column , feature_count , total_feature_count ) ;
END IF ;
2016-04-12 17:10:55 +08:00
END IF ;
2016-04-15 18:37:16 +08:00
WHEN ' text ' , ' character varying ' , ' character ' THEN
2016-04-27 00:02:25 +08:00
IF _cdb_categorical_column ( base_table , column_name ) THEN
2016-04-26 21:15:24 +08:00
RETURN Format ( ' _cdb_mode(%s):: ' , qualified_column ) | | column_type ;
2016-04-15 18:37:16 +08:00
ELSE
2016-04-27 00:02:25 +08:00
IF _cdb_unlimited_text_column ( base_table , column_name ) THEN
2016-04-26 21:15:24 +08:00
-- TODO: this should not be applied to columns containing largish text;
-- it is intended only to short names/identifiers
RETURN ' CASE WHEN count(distinct ' | | qualified_column | | ' ) = 1 THEN MIN( ' | | qualified_column | | ' ) WHEN ' | | total_feature_count | | ' < 5 THEN string_agg(distinct ' | | qualified_column | | ' , '' / '' ) ELSE '' * '' END:: ' | | column_type ;
ELSE
RETURN ' CASE count(*) WHEN 1 THEN MIN( ' | | qualified_column | | ' ) ELSE NULL END:: ' | | column_type ;
END IF ;
2016-04-15 18:37:16 +08:00
END IF ;
2016-03-21 19:58:29 +08:00
WHEN ' boolean ' THEN
RETURN ' CASE count(*) WHEN 1 THEN BOOL_AND( ' | | qualified_column | | ' ) ELSE NULL END:: ' | | column_type ;
2015-12-29 21:56:47 +08:00
ELSE
RETURN ' CASE count(*) WHEN 1 THEN MIN( ' | | qualified_column | | ' ) ELSE NULL END:: ' | | column_type ;
2015-12-23 19:17:37 +08:00
END CASE ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL RESTRICTED ;
2015-12-23 19:17:37 +08:00
-- List of dataset aggregated attributes as a comma-separated SQL expression.
-- Scope: private.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- table_alias: (optional) table qualifier for the columns to be aggregated
-- Return value: SQL subexpression as text
CREATE OR REPLACE FUNCTION _CDB_Aggregated_Attributes_Expression ( reloid REGCLASS , table_alias TEXT DEFAULT ' ' )
RETURNS TEXT
AS $ $
DECLARE
attr_list TEXT ;
BEGIN
SELECT string_agg ( _CDB_Attribute_Aggregation_Expression ( reloid , s . c , table_alias ) | | Format ( ' AS %s ' , s . c ) , ' , ' )
FROM (
SELECT * FROM _CDB_Aggregable_Attributes ( reloid ) c
) AS s INTO attr_list ;
RETURN attr_list ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL RESTRICTED ;
2015-12-23 19:17:37 +08:00
2016-01-29 23:19:35 +08:00
-- Array of geometry types detected in a cartodbfied table
-- For effciency only look at a limited number of rwos.
-- Parameters
-- reloid: oid of the input table. It must be a cartodbfy'ed table.
-- Return value: array of geometry type names
CREATE OR REPLACE FUNCTION _CDB_GeometryTypes ( reloid REGCLASS )
RETURNS TEXT [ ]
AS $ $
DECLARE
gtypes TEXT [ ] ;
BEGIN
EXECUTE Format ( '
SELECT array_agg ( DISTINCT ST_GeometryType ( the_geom ) ) FROM (
SELECT the_geom FROM % s
WHERE ( the_geom is not null ) LIMIT 10
) as geom_types
' , reloid)
INTO gtypes ;
RETURN gtypes ;
END
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL STABLE PARALLEL SAFE ;
2016-01-29 23:19:35 +08:00
2015-12-23 19:42:40 +08:00
-- Experimental Overview reduction method for point datasets.
-- It clusters the points using a grid, then aggregates the point in each
-- cluster into a point at the centroid of the clustered records.
2016-01-12 18:59:42 +08:00
-- Scope: private.
2015-12-23 19:42:40 +08:00
-- Parameters:
-- reloid original table (can be the base table of the dataset or an existing
-- overview) from which the overview is being generated.
-- ref_z Z level assigned to the original table
-- overview_z Z level of the overview to be generated, must be smaller than ref_z
-- Return value: Name of the generated overview table
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION _CDB_GridCluster_Reduce_Strategy ( reloid REGCLASS , ref_z INTEGER , overview_z INTEGER , grid_px FLOAT8 DEFAULT NULL , has_overview_created BOOLEAN DEFAULT FALSE )
2015-12-17 00:45:36 +08:00
RETURNS REGCLASS
AS $ $
DECLARE
overview_rel TEXT ;
reduction FLOAT8 ;
base_name TEXT ;
2016-04-23 21:07:06 +08:00
pixel_m FLOAT8 ;
2015-12-17 00:45:36 +08:00
grid_m FLOAT8 ;
2016-04-23 21:07:06 +08:00
offset_m FLOAT8 ;
2016-04-25 19:33:43 +08:00
offset_x TEXT ;
offset_y TEXT ;
cell_x TEXT ;
cell_y TEXT ;
2015-12-17 00:45:36 +08:00
aggr_attributes TEXT ;
attributes TEXT ;
2016-01-11 22:34:10 +08:00
columns TEXT ;
2016-01-29 23:19:35 +08:00
gtypes TEXT [ ] ;
2016-04-04 23:45:20 +08:00
schema_name TEXT ;
table_name TEXT ;
2016-04-07 21:34:31 +08:00
point_geom TEXT ;
2017-06-02 19:06:55 +08:00
overview_table_name TEXT ;
creation_clause TEXT ;
2015-12-17 00:45:36 +08:00
BEGIN
2016-01-29 23:19:35 +08:00
SELECT _CDB_GeometryTypes ( reloid ) INTO gtypes ;
2016-04-13 23:49:38 +08:00
IF gtypes IS NULL OR array_upper ( gtypes , 1 ) < > 1 OR gtypes [ 1 ] < > ' ST_Point ' THEN
2016-01-29 23:19:35 +08:00
-- This strategy only supports datasets with point geomety
RETURN NULL ;
END IF ;
- - TODO : check applicability : geometry type , minimum number of points . . .
2015-12-17 00:45:36 +08:00
overview_rel : = _CDB_Overview_Name ( reloid , ref_z , overview_z ) ;
2016-04-05 01:21:10 +08:00
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
2016-04-14 18:12:17 +08:00
grid_px : = 1 . 0 ;
2016-04-05 01:21:10 +08:00
END IF ;
2016-04-04 23:45:20 +08:00
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
2016-04-23 21:07:06 +08:00
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution ( overview_z ) INTO pixel_m ;
-- grid size in meters
grid_m = grid_px * pixel_m ;
2015-12-17 00:45:36 +08:00
2015-12-23 19:17:37 +08:00
attributes : = _CDB_Aggregable_Attributes_Expression ( reloid ) ;
aggr_attributes : = _CDB_Aggregated_Attributes_Expression ( reloid ) ;
IF attributes < > ' ' THEN
2015-12-29 20:38:07 +08:00
attributes : = ' , ' | | attributes ;
2015-12-23 19:17:37 +08:00
END IF ;
IF aggr_attributes < > ' ' THEN
aggr_attributes : = aggr_attributes | | ' , ' ;
END IF ;
2015-12-17 00:45:36 +08:00
2016-04-25 19:33:43 +08:00
-- Center of each cell:
cell_x : = Format ( ' gx*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
cell_y : = Format ( ' gy*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
2016-04-23 21:07:06 +08:00
2016-04-25 19:33:43 +08:00
-- Displacement to the nearest pixel center:
2016-04-25 19:53:59 +08:00
IF MOD ( grid_px : : numeric , 1 . 0 : : numeric ) = 0 THEN
offset_m : = pixel_m / 2 - MOD ( ( grid_m / 2 ) : : numeric , pixel_m : : numeric ) : : float8 ;
offset_x : = Format ( ' %s ' , offset_m ) ;
offset_y : = Format ( ' %s ' , offset_m ) ;
ELSE
offset_x : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_x , pixel_m ) ;
offset_y : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_y , pixel_m ) ;
END IF ;
2016-04-25 19:33:43 +08:00
point_geom : = Format ( ' ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857) ' , cell_x , cell_y , offset_x , offset_y ) ;
2016-04-07 21:34:31 +08:00
2016-01-11 22:34:10 +08:00
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN ' cartodb_id ' THEN ' cartodb_id '
WHEN ' the_geom ' THEN
2016-04-07 21:34:31 +08:00
Format ( ' ST_Transform(%s, 4326) AS the_geom ' , point_geom )
2016-01-11 22:34:10 +08:00
WHEN ' the_geom_webmercator ' THEN
2016-04-07 21:34:31 +08:00
Format ( ' %s AS the_geom_webmercator ' , point_geom )
2016-01-11 22:34:10 +08:00
ELSE c
END AS column
FROM CDB_ColumnNames ( reloid ) c
)
SELECT string_agg ( s . column , ' , ' ) FROM (
SELECT * FROM cols
) AS s INTO columns ;
2016-04-15 00:23:09 +08:00
IF NOT columns LIKE ' %_feature_count% ' THEN
columns : = columns | | ' , n AS _feature_count ' ;
2016-04-27 00:27:52 +08:00
END IF ;
2016-04-12 17:10:55 +08:00
2017-06-02 19:06:55 +08:00
overview_table_name : = Format ( ' %I.%I ' , schema_name , overview_rel ) ;
IF has_overview_created THEN
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster strategy deleting and inserting because % has overviews ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
EXECUTE Format ( ' DELETE FROM %s; ' , overview_table_name ) ;
creation_clause : = Format ( ' INSERT INTO %s ' , overview_table_name ) ;
ELSE
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster strategy creating a new table overview % ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
creation_clause : = Format ( ' CREATE TABLE %s AS ' , overview_table_name ) ;
END IF ;
2015-12-23 00:59:49 +08:00
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
2015-12-17 00:45:36 +08:00
EXECUTE Format ( '
2017-06-02 19:06:55 +08:00
% 3 $ s
2015-12-17 00:45:36 +08:00
WITH clusters AS (
SELECT
2015-12-23 00:59:49 +08:00
% 5 $ s
count ( * ) AS n ,
Floor ( ST_X ( f . the_geom_webmercator ) / % 2 $ s ) : : int AS gx ,
Floor ( ST_Y ( f . the_geom_webmercator ) / % 2 $ s ) : : int AS gy ,
2016-01-04 17:33:39 +08:00
MIN ( cartodb_id ) AS cartodb_id
2015-12-23 00:59:49 +08:00
FROM % 1 $ s f
2016-07-05 22:45:12 +08:00
WHERE f . the_geom_webmercator IS NOT NULL
2015-12-23 00:59:49 +08:00
GROUP BY gx , gy
2015-12-17 00:45:36 +08:00
)
2016-01-11 22:34:10 +08:00
SELECT % 6 $ s FROM clusters
2017-06-02 19:06:55 +08:00
' , reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
2015-12-17 00:45:36 +08:00
2017-06-02 19:06:55 +08:00
RETURN Format ( ' %s ' , overview_table_name ) : : regclass ;
2015-12-17 00:45:36 +08:00
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2015-12-17 00:45:36 +08:00
2016-11-03 20:31:04 +08:00
-- This strategy places the aggregation of each cluster at the centroid of the cluster members.
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION _CDB_GridClusterCentroid_Reduce_Strategy ( reloid REGCLASS , ref_z INTEGER , overview_z INTEGER , grid_px FLOAT8 DEFAULT NULL , has_overview_created BOOLEAN DEFAULT FALSE )
2016-11-03 20:31:04 +08:00
RETURNS REGCLASS
AS $ $
DECLARE
overview_rel TEXT ;
reduction FLOAT8 ;
base_name TEXT ;
pixel_m FLOAT8 ;
grid_m FLOAT8 ;
offset_m FLOAT8 ;
offset_x TEXT ;
offset_y TEXT ;
cell_x TEXT ;
cell_y TEXT ;
aggr_attributes TEXT ;
attributes TEXT ;
columns TEXT ;
gtypes TEXT [ ] ;
schema_name TEXT ;
table_name TEXT ;
point_geom TEXT ;
2017-06-02 19:06:55 +08:00
overview_table_name TEXT ;
creation_clause TEXT ;
2016-11-03 20:31:04 +08:00
BEGIN
SELECT _CDB_GeometryTypes ( reloid ) INTO gtypes ;
IF gtypes IS NULL OR array_upper ( gtypes , 1 ) < > 1 OR gtypes [ 1 ] < > ' ST_Point ' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL ;
END IF ;
- - TODO : check applicability : geometry type , minimum number of points . . .
overview_rel : = _CDB_Overview_Name ( reloid , ref_z , overview_z ) ;
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px : = 1 . 0 ;
END IF ;
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution ( overview_z ) INTO pixel_m ;
-- grid size in meters
grid_m = grid_px * pixel_m ;
attributes : = _CDB_Aggregable_Attributes_Expression ( reloid ) ;
aggr_attributes : = _CDB_Aggregated_Attributes_Expression ( reloid ) ;
IF attributes < > ' ' THEN
attributes : = ' , ' | | attributes ;
END IF ;
IF aggr_attributes < > ' ' THEN
aggr_attributes : = aggr_attributes | | ' , ' ;
END IF ;
-- Center of each cell:
cell_x : = Format ( ' gx*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
cell_y : = Format ( ' gy*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
-- Displacement to the nearest pixel center:
IF MOD ( grid_px : : numeric , 1 . 0 : : numeric ) = 0 THEN
offset_m : = pixel_m / 2 - MOD ( ( grid_m / 2 ) : : numeric , pixel_m : : numeric ) : : float8 ;
offset_x : = Format ( ' %s ' , offset_m ) ;
offset_y : = Format ( ' %s ' , offset_m ) ;
ELSE
offset_x : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_x , pixel_m ) ;
offset_y : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_y , pixel_m ) ;
END IF ;
point_geom : = Format ( ' ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857) ' , cell_x , cell_y , offset_x , offset_y ) ;
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN ' cartodb_id ' THEN ' cartodb_id '
WHEN ' the_geom ' THEN
' ST_Transform(ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857), 4326) AS the_geom '
WHEN ' the_geom_webmercator ' THEN
' ST_SetSRID(ST_MakePoint(_sum_of_x/n, _sum_of_y/n), 3857) AS the_geom_webmercator '
ELSE c
END AS column
FROM CDB_ColumnNames ( reloid ) c
)
SELECT string_agg ( s . column , ' , ' ) FROM (
SELECT * FROM cols
) AS s INTO columns ;
IF NOT columns LIKE ' %_feature_count% ' THEN
columns : = columns | | ' , n AS _feature_count ' ;
END IF ;
2017-06-02 19:06:55 +08:00
overview_table_name : = Format ( ' %I.%I ' , schema_name , overview_rel ) ;
IF has_overview_created THEN
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster centroid strategy deleting and inserting because % has overviews ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
EXECUTE Format ( ' DELETE FROM %s; ' , overview_table_name ) ;
creation_clause : = Format ( ' INSERT INTO %s ' , overview_table_name ) ;
ELSE
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster centroid strategy creating a new table overview % ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
creation_clause : = Format ( ' CREATE TABLE %s AS ' , overview_table_name ) ;
END IF ;
2016-11-03 20:31:04 +08:00
-- Now we cluster the data using a grid of size grid_m
-- and selecte the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format ( '
2017-06-02 19:06:55 +08:00
% 3 $ s
2016-11-03 20:31:04 +08:00
WITH clusters AS (
SELECT
% 5 $ s
count ( * ) AS n ,
SUM ( ST_X ( f . the_geom_webmercator ) ) AS _sum_of_x ,
SUM ( ST_Y ( f . the_geom_webmercator ) ) AS _sum_of_y ,
Floor ( ST_Y ( f . the_geom_webmercator ) / % 2 $ s ) : : int AS gy ,
Floor ( ST_X ( f . the_geom_webmercator ) / % 2 $ s ) : : int AS gx ,
MIN ( cartodb_id ) AS cartodb_id
FROM % 1 $ s f
GROUP BY gx , gy
)
SELECT % 6 $ s FROM clusters
2017-06-02 19:06:55 +08:00
' , reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
2016-11-03 20:31:04 +08:00
2017-06-02 19:06:55 +08:00
RETURN Format ( ' %s ' , overview_table_name ) : : regclass ;
2016-11-03 20:31:04 +08:00
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2016-11-03 20:31:04 +08:00
-- This strategy places the aggregation of each cluster at the position of one of the cluster members.
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION _CDB_GridClusterSample_Reduce_Strategy ( reloid REGCLASS , ref_z INTEGER , overview_z INTEGER , grid_px FLOAT8 DEFAULT NULL , has_overview_created BOOLEAN DEFAULT FALSE )
2016-11-03 20:31:04 +08:00
RETURNS REGCLASS
AS $ $
DECLARE
overview_rel TEXT ;
reduction FLOAT8 ;
base_name TEXT ;
pixel_m FLOAT8 ;
grid_m FLOAT8 ;
offset_m FLOAT8 ;
offset_x TEXT ;
offset_y TEXT ;
cell_x TEXT ;
cell_y TEXT ;
aggr_attributes TEXT ;
attributes TEXT ;
columns TEXT ;
gtypes TEXT [ ] ;
schema_name TEXT ;
table_name TEXT ;
point_geom TEXT ;
2017-06-02 19:06:55 +08:00
overview_table_name TEXT ;
creation_clause TEXT ;
2016-11-03 20:31:04 +08:00
BEGIN
SELECT _CDB_GeometryTypes ( reloid ) INTO gtypes ;
IF gtypes IS NULL OR array_upper ( gtypes , 1 ) < > 1 OR gtypes [ 1 ] < > ' ST_Point ' THEN
-- This strategy only supports datasets with point geomety
RETURN NULL ;
END IF ;
- - TODO : check applicability : geometry type , minimum number of points . . .
overview_rel : = _CDB_Overview_Name ( reloid , ref_z , overview_z ) ;
-- Grid size in pixels at Z level overview_z
IF grid_px IS NULL THEN
grid_px : = 1 . 0 ;
END IF ;
SELECT * FROM _cdb_split_table_name ( reloid ) INTO schema_name , table_name ;
-- pixel_m: size of a pixel in webmercator units (meters)
SELECT CDB_XYZ_Resolution ( overview_z ) INTO pixel_m ;
-- grid size in meters
grid_m = grid_px * pixel_m ;
attributes : = _CDB_Aggregable_Attributes_Expression ( reloid ) ;
aggr_attributes : = _CDB_Aggregated_Attributes_Expression ( reloid ) ;
IF attributes < > ' ' THEN
attributes : = ' , ' | | attributes ;
END IF ;
IF aggr_attributes < > ' ' THEN
aggr_attributes : = aggr_attributes | | ' , ' ;
END IF ;
-- Center of each cell:
cell_x : = Format ( ' gx*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
cell_y : = Format ( ' gy*%1$s + %2$s ' , grid_m , grid_m / 2 ) ;
-- Displacement to the nearest pixel center:
IF MOD ( grid_px : : numeric , 1 . 0 : : numeric ) = 0 THEN
offset_m : = pixel_m / 2 - MOD ( ( grid_m / 2 ) : : numeric , pixel_m : : numeric ) : : float8 ;
offset_x : = Format ( ' %s ' , offset_m ) ;
offset_y : = Format ( ' %s ' , offset_m ) ;
ELSE
offset_x : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_x , pixel_m ) ;
offset_y : = Format ( ' %2$s/2 - MOD((%1$s)::numeric, (%2$s)::numeric)::float8 ' , cell_y , pixel_m ) ;
END IF ;
point_geom : = Format ( ' ST_SetSRID(ST_MakePoint(%1$s + %3$s, %2$s + %4$s), 3857) ' , cell_x , cell_y , offset_x , offset_y ) ;
-- compute the resulting columns in the same order as in the base table
WITH cols AS (
SELECT
CASE c
WHEN ' cartodb_id ' THEN ' cartodb_id '
ELSE c
END AS column
FROM CDB_ColumnNames ( reloid ) c
)
SELECT string_agg ( s . column , ' , ' ) FROM (
SELECT * FROM cols
) AS s INTO columns ;
IF NOT columns LIKE ' %_feature_count% ' THEN
columns : = columns | | ' , n AS _feature_count ' ;
END IF ;
2017-06-02 19:06:55 +08:00
overview_table_name : = Format ( ' %I.%I ' , schema_name , overview_rel ) ;
IF has_overview_created THEN
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster sampling strategy deleting and inserting because % has overviews ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
EXECUTE Format ( ' DELETE FROM %s; ' , overview_table_name ) ;
creation_clause : = Format ( ' INSERT INTO %s ' , overview_table_name ) ;
ELSE
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Grid cluster sampling strategy creating a new table overview % ' , overview_table_name ;
2017-06-02 19:06:55 +08:00
creation_clause : = Format ( ' CREATE TABLE %s AS ' , overview_table_name ) ;
END IF ;
2016-11-03 20:31:04 +08:00
-- Now we cluster the data using a grid of size grid_m
-- and select the centroid (average coordinates) of each cluster.
-- If we had a selected numeric attribute of interest we could use it
-- as a weight for the average coordinates.
EXECUTE Format ( '
2017-06-02 19:06:55 +08:00
% 3 $ s
2016-11-03 20:31:04 +08:00
WITH clusters AS (
SELECT
% 5 $ s
count ( * ) AS n ,
2016-11-29 21:08:08 +08:00
Floor ( ST_X ( _f . the_geom_webmercator ) / % 2 $ s ) : : int AS gx ,
Floor ( ST_Y ( _f . the_geom_webmercator ) / % 2 $ s ) : : int AS gy ,
2016-11-03 20:31:04 +08:00
MIN ( cartodb_id ) AS cartodb_id
2016-11-29 21:08:08 +08:00
FROM % 1 $ s _f
2016-11-03 20:31:04 +08:00
GROUP BY gx , gy
2016-11-29 21:08:08 +08:00
) ,
cluster_geom AS (
SELECT the_geom , the_geom_webmercator , clusters . *
FROM clusters INNER JOIN % 1 $ s _g ON ( clusters . cartodb_id = _g . cartodb_id )
2016-11-03 20:31:04 +08:00
)
2016-11-29 21:08:08 +08:00
SELECT % 6 $ s FROM cluster_geom
2017-06-02 19:06:55 +08:00
' , reloid::text, grid_m, creation_clause, attributes, aggr_attributes, columns);
2016-11-03 20:31:04 +08:00
2017-06-02 19:06:55 +08:00
RETURN Format ( ' %s ' , overview_table_name ) : : regclass ;
2016-11-03 20:31:04 +08:00
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2016-11-03 20:31:04 +08:00
2015-12-23 19:42:40 +08:00
-- Create overview tables for a dataset.
-- Scope: public
-- Parameters:
-- reloid: oid of the input table. It must be a cartodbfy'ed table with
-- vector features.
-- refscale_strategy: function that computes the reference Z of the dataset
-- reduce_strategy: function that generates overviews from a base table
2016-01-11 22:34:10 +08:00
-- or higher level overview. The overview tables
-- created by the strategy must have the same columns
-- as the base table and in the same order.
2015-12-23 19:42:40 +08:00
-- Return value: Array with the names of the generated overview tables
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION CDB_CreateOverviews ( reloid REGCLASS , refscale_strategy regproc DEFAULT ' _CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8) ' : : regprocedure , reduce_strategy regproc DEFAULT ' _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN) ' : : regprocedure )
2015-12-16 00:36:27 +08:00
RETURNS text [ ]
AS $ $
2016-04-05 01:21:10 +08:00
DECLARE
tolerance_px FLOAT8 ;
BEGIN
-- Use the default tolerance
2016-04-14 17:13:59 +08:00
tolerance_px : = 1 . 0 ;
2016-04-05 01:21:10 +08:00
RETURN CDB_CreateOverviewsWithToleranceInPixels ( reloid , tolerance_px , refscale_strategy , reduce_strategy ) ;
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2016-04-05 01:21:10 +08:00
-- Create overviews with additional parameter to define the desired detail/tolerance in pixels
2017-06-02 19:06:55 +08:00
CREATE OR REPLACE FUNCTION CDB_CreateOverviewsWithToleranceInPixels ( reloid REGCLASS , tolerance_px FLOAT8 , refscale_strategy regproc DEFAULT ' _CDB_Feature_Density_Ref_Z_Strategy(REGCLASS,FLOAT8) ' : : regprocedure , reduce_strategy regproc DEFAULT ' _CDB_GridCluster_Reduce_Strategy(REGCLASS,INTEGER,INTEGER,FLOAT8,BOOLEAN) ' : : regprocedure )
2016-04-05 01:21:10 +08:00
RETURNS text [ ]
AS $ $
2015-12-16 00:36:27 +08:00
DECLARE
ref_z integer ;
overviews_z integer [ ] ;
base_z integer ;
base_rel REGCLASS ;
overview_z integer ;
overview_tables REGCLASS [ ] ;
2015-12-29 21:56:04 +08:00
overviews_step integer : = 1 ;
2016-04-14 23:32:18 +08:00
has_counter_column boolean ;
2017-06-02 19:06:55 +08:00
has_overviews_for_z boolean ;
2015-12-16 00:36:27 +08:00
BEGIN
-- Determine the referece zoom level
2016-04-05 01:21:10 +08:00
EXECUTE ' SELECT ' | | quote_ident ( refscale_strategy : : text ) | | Format ( ' ( '' %s '' , %s); ' , reloid , tolerance_px ) INTO ref_z ;
2015-12-16 00:36:27 +08:00
2016-04-13 23:49:38 +08:00
IF ref_z < 0 OR ref_z IS NULL THEN
RETURN NULL ;
END IF ;
2015-12-16 00:36:27 +08:00
-- Determine overlay zoom levels
2016-01-29 23:19:35 +08:00
-- TODO: should be handled by the refscale_strategy?
2015-12-16 00:36:27 +08:00
overview_z : = ref_z - 1 ;
WHILE overview_z > = 0 LOOP
SELECT array_append ( overviews_z , overview_z ) INTO overviews_z ;
2015-12-29 21:56:04 +08:00
overview_z : = overview_z - overviews_step ;
2015-12-16 00:36:27 +08:00
END LOOP ;
2017-06-02 21:21:38 +08:00
-- TODO Check for non-used overview to delete them but we have to be aware that the
-- current queries, for example from a tiler, are been used with the old overviews
-- so if we remove the overviews in the process this could lead to errors
2017-06-02 19:06:55 +08:00
2017-06-02 21:21:38 +08:00
-- Create or reganerate overlay tables
2015-12-16 00:36:27 +08:00
base_z : = ref_z ;
base_rel : = reloid ;
FOREACH overview_z IN ARRAY overviews_z LOOP
2017-06-02 19:06:55 +08:00
SELECT CASE WHEN count ( * ) > 0 THEN TRUE ELSE FALSE END from CDB_Overviews ( reloid ) WHERE z = overview_z INTO has_overviews_for_z ;
EXECUTE ' SELECT ' | | quote_ident ( reduce_strategy : : text ) | | Format ( ' ( '' %s '' , %s, %s, %s, '' %s '' ); ' , base_rel , base_z , overview_z , tolerance_px , has_overviews_for_z ) INTO base_rel ;
2016-01-29 23:19:35 +08:00
IF base_rel IS NULL THEN
EXIT ;
END IF ;
2015-12-16 02:13:39 +08:00
base_z : = overview_z ;
2017-06-02 19:06:55 +08:00
IF NOT has_overviews_for_z THEN
2017-06-02 21:21:38 +08:00
RAISE NOTICE ' Registering overview: % ' , base_rel ;
2017-06-02 19:06:55 +08:00
PERFORM _CDB_Register_Overview ( reloid , base_rel , base_z ) ;
END IF ;
2015-12-16 00:36:27 +08:00
SELECT array_append ( overview_tables , base_rel ) INTO overview_tables ;
END LOOP ;
2016-04-14 23:32:18 +08:00
IF overview_tables IS NOT NULL AND array_length ( overview_tables , 1 ) > 0 THEN
SELECT EXISTS (
2016-04-15 00:23:09 +08:00
SELECT * FROM CDB_ColumnNames ( reloid ) as colname WHERE colname = ' _feature_count '
2016-04-14 23:32:18 +08:00
) INTO has_counter_column ;
IF NOT has_counter_column THEN
EXECUTE Format ( '
2016-04-15 00:23:09 +08:00
ALTER TABLE % s ADD COLUMN _feature_count integer DEFAULT 1 ;
2016-04-14 23:32:18 +08:00
' , reloid);
END IF ;
END IF ;
2015-12-16 00:36:27 +08:00
RETURN overview_tables ;
END ;
2017-10-24 20:16:56 +08:00
$ $ LANGUAGE PLPGSQL VOLATILE PARALLEL UNSAFE ;
2016-04-05 19:10:00 +08:00
2017-06-02 19:06:55 +08:00
-- Here are some older signatures of these functions, no longer in use.
2016-04-05 19:22:07 +08:00
-- They must be droped here, after the (new) definition of the function `CDB_CreateOverviews`
-- because that function used to contain references to them in the default argument values.
2016-04-05 19:10:00 +08:00
DROP FUNCTION IF EXISTS _CDB_Feature_Density_Ref_Z_Strategy ( REGCLASS ) ;
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER ) ;
2017-06-02 19:06:55 +08:00
DROP FUNCTION IF EXISTS _CDB_GridCluster_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER , FLOAT8 ) ;
DROP FUNCTION IF EXISTS _CDB_GridClusterCentroid_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER , FLOAT8 ) ;
DROP FUNCTION IF EXISTS _CDB_GridClusterSample_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER , FLOAT8 ) ;
2016-04-05 19:10:00 +08:00
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER ) ;
2017-06-02 19:06:55 +08:00
DROP FUNCTION IF EXISTS _CDB_Sampling_Reduce_Strategy ( REGCLASS , INTEGER , INTEGER , FLOAT8 ) ;