diff --git a/Makefile b/Makefile index 70a5b84..750c891 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,7 @@ EXTENSION = cartodb EXTVERSION = 0.1 CDBSCRIPTS = \ - scripts-available/CDB_TableMetadata.sql \ - scripts-available/CDB_Quota.sql \ - scripts-available/CDB_TransformToWebmercator.sql \ - scripts-available/CDB_CartodbfyTable.sql \ + scripts-enabled/*.sql \ $(END) DATA_built = $(EXTENSION)--$(EXTVERSION).sql diff --git a/scripts-available/CDB_CartodbfyTable.sql b/scripts-available/CDB_CartodbfyTable.sql new file mode 100644 index 0000000..af9a8e5 --- /dev/null +++ b/scripts-available/CDB_CartodbfyTable.sql @@ -0,0 +1,362 @@ +-- Depends on: +-- * CDB_TransformToWebmercator.sql +-- * CDB_TableMetadata.sql +-- * CDB_Quota.sql +-- * _CDB_UserQuotaInBytes() function, installed by rails +-- (user.rebuild_quota_trigger, called by rake task +-- cartodb:db:update_test_quota_trigger) + +-- Update the_geom_webmercator +CREATE OR REPLACE FUNCTION _CDB_update_the_geom_webmercator() +RETURNS trigger AS $$ +BEGIN + NEW.the_geom_webmercator := CDB_TransformToWebmercator(NEW.the_geom); + RETURN NEW; +END; +$$ LANGUAGE plpgsql VOLATILE; + +CREATE OR REPLACE FUNCTION _CDB_update_updated_at() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at := now(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql VOLATILE; + +-- Ensure a table is a "cartodb" table +-- See https://github.com/CartoDB/cartodb/wiki/CartoDB-user-table +CREATE OR REPLACE FUNCTION CDB_CartodbfyTable(reloid REGCLASS) +RETURNS void +AS $$ +DECLARE + sql TEXT; + rec RECORD; + rec2 RECORD; + tabinfo RECORD; + had_column BOOLEAN; + i INTEGER; + new_name TEXT; + quota_in_bytes INT8; + exists_geom_cols BOOLEAN[]; +BEGIN + + -- Drop cartodb triggers (might prevent changing columns) + + -- "track_updates" + sql := 'DROP TRIGGER IF EXISTS track_updates ON ' || reloid::text; + EXECUTE sql; + + -- "update_the_geom_webmercator" + sql := 'DROP TRIGGER IF EXISTS update_the_geom_webmercator_trigger ON ' || reloid::text; + EXECUTE sql; + + -- "update_updated_at" + sql := 'DROP TRIGGER IF EXISTS update_updated_at_trigger ON ' || reloid::text; + EXECUTE sql; + + -- "test_quota" and "test_quota_per_row" + sql := 'DROP TRIGGER IF EXISTS test_quota ON ' || reloid::text; + EXECUTE sql; + sql := 'DROP TRIGGER IF EXISTS test_quota_per_row ON ' || reloid::text; + EXECUTE sql; + + -- Ensure required fields exist + + -- We need a cartodb_id column + << cartodb_id_setup >> + LOOP --{ + had_column := FALSE; + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' ADD cartodb_id SERIAL NOT NULL UNIQUE'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXIT cartodb_id_setup; + EXCEPTION + WHEN duplicate_column THEN + RAISE NOTICE 'Column cartodb_id already exists'; + had_column := TRUE; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + + IF had_column THEN + + -- Check data type is an integer + SELECT t.typname, t.oid, a.attnotnull FROM pg_type t, pg_attribute a + WHERE a.atttypid = t.oid AND a.attrelid = reloid AND NOT a.attisdropped + AND a.attname = 'cartodb_id' + INTO STRICT rec; + IF rec.oid NOT IN (20,21,23) THEN -- int2, int4, int8 { + RAISE NOTICE 'Existing cartodb_id field is of invalid type % (need int2, int4 or int8), renaming', rec.typname; + ELSE -- }{ + sql := 'ALTER TABLE ' || reloid::text || ' ALTER COLUMN cartodb_id SET NOT NULL'; + IF NOT EXISTS ( SELECT c.conname FROM pg_constraint c, pg_attribute a + WHERE c.conkey = ARRAY[a.attnum] AND c.conrelid = reloid + AND a.attrelid = reloid + AND NOT a.attisdropped + AND a.attname = 'cartodb_id' + AND c.contype = 'u' ) -- unique + THEN + sql := sql || ', ADD unique(cartodb_id)'; + END IF; + BEGIN + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXIT cartodb_id_setup; + EXCEPTION + WHEN unique_violation OR not_null_violation THEN + RAISE NOTICE '%, renaming', SQLERRM; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + END IF; -- } + + + -- invalid column, need rename and re-create it + i := 0; + << rename_column >> + LOOP --{ + new_name := '_cartodb_id' || i; + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' RENAME COLUMN cartodb_id TO ' || new_name; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXCEPTION + WHEN duplicate_column THEN + i := i+1; + CONTINUE rename_column; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + EXIT rename_column; + END LOOP; --} + CONTINUE cartodb_id_setup; + + END IF; + END LOOP; -- } + + -- We need created_at and updated_at + FOR rec IN SELECT * FROM ( VALUES ('created_at'), ('updated_at') ) t(cname) LOOP --{ + << column_setup >> + LOOP --{ + had_column := FALSE; + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' ADD ' || rec.cname + || ' TIMESTAMPTZ NOT NULL DEFAULT now()'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXIT column_setup; + EXCEPTION + WHEN duplicate_column THEN + RAISE NOTICE 'Column % already exists', rec.cname; + had_column := TRUE; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + + IF had_column THEN + + -- Check data type is a TIMESTAMP WITH TIMEZONE + SELECT t.typname, t.oid, a.attnotnull FROM pg_type t, pg_attribute a + WHERE a.atttypid = t.oid AND a.attrelid = reloid AND NOT a.attisdropped + AND a.attname = rec.cname + INTO STRICT rec2; + IF rec2.oid NOT IN (1184) THEN -- timestamptz { + RAISE NOTICE 'Existing % field is of invalid type % (need timestamptz), renaming', rec.cname, rec2.typname; + ELSE -- }{ + sql := 'ALTER TABLE ' || reloid::text || ' ALTER ' || rec.cname + || ' SET NOT NULL, ALTER ' || rec.cname || ' SET DEFAULT now()'; + BEGIN + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXIT column_setup; + EXCEPTION + WHEN not_null_violation THEN + RAISE NOTICE '%, renaming', SQLERRM; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + END IF; -- } + + -- invalid column, need rename and re-create it + i := 0; + << rename_column >> + LOOP --{ + new_name := '_' || rec.cname || i; + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' RENAME COLUMN ' || rec.cname || ' TO ' || new_name; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXCEPTION + WHEN duplicate_column THEN + i := i+1; + CONTINUE rename_column; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + EXIT rename_column; + END LOOP; --} + CONTINUE column_setup; + + END IF; + END LOOP; -- } + + END LOOP; -- } + + -- We need the_geom and the_geom_webmercator + FOR rec IN SELECT * FROM ( VALUES ('the_geom',4326), ('the_geom_webmercator',3857) ) t(cname,csrid) LOOP --{ + << column_setup >> LOOP --{ + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' ADD ' || rec.cname + || ' GEOMETRY(geometry,' || rec.csrid || ')'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + sql := 'CREATE INDEX ON ' || reloid::text || ' USING GIST ( ' || rec.cname || ')'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + exists_geom_cols := array_append(exists_geom_cols, false); + EXIT column_setup; + EXCEPTION + WHEN duplicate_column THEN + exists_geom_cols := array_append(exists_geom_cols, true); + RAISE NOTICE 'Column % already exists', rec.cname; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + + << column_fixup >> + LOOP --{ + + -- Check data type is a GEOMETRY + SELECT t.typname, t.oid, a.attnotnull, + postgis_typmod_srid(a.atttypmod) as srid, + postgis_typmod_type(a.atttypmod) as gtype + FROM pg_type t, pg_attribute a + WHERE a.atttypid = t.oid AND a.attrelid = reloid AND NOT a.attisdropped + AND a.attname = rec.cname + INTO STRICT rec2; + + IF rec2.typname NOT IN ('geometry') THEN -- { + RAISE NOTICE 'Existing % field is of invalid type % (need geometry), renaming', rec.cname, rec2.typname; + EXIT column_fixup; -- cannot fix + END IF; -- } + + IF rec2.srid != rec.csrid THEN -- { + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' ALTER ' || rec.cname + || ' TYPE geometry(' || rec2.gtype || ',' || rec.csrid || ') USING ST_Transform(' + || rec.cname || ',' || rec.csrid || ')'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXCEPTION + WHEN others THEN + RAISE NOTICE 'Could not enforce SRID % to column %: %, renaming', rec.csrid, rec.cname, SQLERRM; + EXIT column_fixup; -- cannot fix, will rename + END; + END IF; -- } + + -- add gist indices if not there already + IF NOT EXISTS ( SELECT ir.relname + FROM pg_am am, pg_class ir, + pg_class c, pg_index i, + pg_attribute a + WHERE c.oid = reloid AND i.indrelid = c.oid + AND a.attname = rec.cname + AND i.indexrelid = ir.oid AND i.indnatts = 1 + AND i.indkey[0] = a.attnum AND a.attrelid = c.oid + AND NOT a.attisdropped AND am.oid = ir.relam + AND am.amname = 'gist' ) + THEN -- { + BEGIN + sql := 'CREATE INDEX ON ' || reloid::text || ' USING GIST ( ' || rec.cname || ')'; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXCEPTION + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + END IF; -- } + + -- if we reached this line, all went good + EXIT column_setup; + + END LOOP; -- } column_fixup + + -- invalid column, need rename and re-create it + i := 0; + << rename_column >> + LOOP --{ + new_name := '_' || rec.cname || i; + BEGIN + sql := 'ALTER TABLE ' || reloid::text || ' RENAME COLUMN ' || rec.cname || ' TO ' || new_name; + RAISE DEBUG 'Running %', sql; + EXECUTE sql; + EXCEPTION + WHEN duplicate_column THEN + i := i+1; + CONTINUE rename_column; + WHEN others THEN + RAISE EXCEPTION 'Got % (%)', SQLERRM, SQLSTATE; + END; + EXIT rename_column; + END LOOP; --} + CONTINUE column_setup; + + END LOOP; -- } column_setup + + END LOOP; -- } on expected geometry columns + + -- Initialize the_geom with values from the_geom_webmercator + -- do this only if the_geom_webmercator was found (not created) + -- _and_ the_geom was NOT found. + IF exists_geom_cols[2] AND NOT exists_geom_cols[1] THEN + sql := 'UPDATE ' || reloid::text || ' SET the_geom = ST_Transform(the_geom_webmercator, 4326) '; + EXECUTE sql; + END IF; + + -- Initialize the_geom_webmercator with values from the_geom + -- do this only if the_geom was found (not created) + -- _and_ the_geom_webmercator was NOT found. + IF exists_geom_cols[1] AND NOT exists_geom_cols[2] THEN + sql := 'UPDATE ' || reloid::text || ' SET the_geom_webmercator = CDB_TransformToWebmercator(the_geom) '; + EXECUTE sql; + END IF; + + -- Re-create all triggers + + -- NOTE: drop/create has the side-effect of re-enabling disabled triggers + + -- "track_updates" + sql := 'CREATE trigger track_updates AFTER INSERT OR UPDATE OR DELETE OR TRUNCATE ON ' + || reloid::text + || ' FOR EACH STATEMENT EXECUTE PROCEDURE public.cdb_tablemetadata_trigger()'; + EXECUTE sql; + + -- "update_the_geom_webmercator" + -- TODO: why _before_ and not after ? + sql := 'CREATE trigger update_the_geom_webmercator_trigger BEFORE INSERT OR UPDATE OF the_geom ON ' + || reloid::text + || ' FOR EACH ROW EXECUTE PROCEDURE public._CDB_update_the_geom_webmercator()'; + EXECUTE sql; + + -- "update_updated_at" + -- TODO: why _before_ and not after ? + sql := 'CREATE trigger update_updated_at_trigger BEFORE UPDATE ON ' + || reloid::text + || ' FOR EACH ROW EXECUTE PROCEDURE public._CDB_update_updated_at()'; + EXECUTE sql; + + -- "test_quota" and "test_quota_per_row" + + sql := 'CREATE TRIGGER test_quota BEFORE UPDATE OR INSERT ON ' + || reloid::text + || ' EXECUTE PROCEDURE public.CDB_CheckQuota(1)'; + EXECUTE sql; + + sql := 'CREATE TRIGGER test_quota_per_row BEFORE UPDATE OR INSERT ON ' + || reloid::text + || ' FOR EACH ROW EXECUTE PROCEDURE public.CDB_CheckQuota(0.001)'; + EXECUTE sql; + +END; +$$ LANGUAGE PLPGSQL; diff --git a/scripts-available/CDB_ColumnNames.sql b/scripts-available/CDB_ColumnNames.sql new file mode 100644 index 0000000..2c74dbc --- /dev/null +++ b/scripts-available/CDB_ColumnNames.sql @@ -0,0 +1,16 @@ +-- Function returning the column names of a table +CREATE OR REPLACE FUNCTION CDB_ColumnNames(REGCLASS) +RETURNS SETOF information_schema.sql_identifier +AS $$ + + SELECT column_name + FROM information_schema.columns + WHERE + table_name IN (SELECT CDB_UserTables()) + AND table_name = '' || $1 || ''; + +$$ LANGUAGE SQL; + +-- This is a private function, so only the db owner need privileges +REVOKE ALL ON FUNCTION CDB_ColumnNames(REGCLASS) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION CDB_ColumnNames(REGCLASS) TO ":DATABASE_USERNAME"; diff --git a/scripts-available/CDB_ColumnType.sql b/scripts-available/CDB_ColumnType.sql new file mode 100644 index 0000000..6b8196e --- /dev/null +++ b/scripts-available/CDB_ColumnType.sql @@ -0,0 +1,17 @@ +-- Function returning the type of a column +CREATE OR REPLACE FUNCTION CDB_ColumnType(REGCLASS, TEXT) +RETURNS information_schema.character_data +AS $$ + + SELECT data_type + FROM information_schema.columns + WHERE + table_name IN (SELECT CDB_UserTables()) + AND table_name = '' || $1 || '' + AND column_name = '' || quote_ident($2) || ''; + +$$ LANGUAGE SQL; + +-- This is a private function, so only the db owner need privileges +REVOKE ALL ON FUNCTION CDB_ColumnType(REGCLASS, TEXT) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION CDB_ColumnType(REGCLASS, TEXT) TO ":DATABASE_USERNAME"; diff --git a/scripts-available/CDB_DateToNumber.sql b/scripts-available/CDB_DateToNumber.sql new file mode 100644 index 0000000..c879112 --- /dev/null +++ b/scripts-available/CDB_DateToNumber.sql @@ -0,0 +1,15 @@ +-- Convert timestamp to double precision +-- +CREATE OR REPLACE FUNCTION CDB_DateToNumber(input timestamp) +RETURNS double precision AS $$ +DECLARE output double precision DEFAULT NULL; +BEGIN + BEGIN + SELECT extract (EPOCH FROM input) INTO output; + EXCEPTION WHEN OTHERS THEN + RETURN NULL; + END; +RETURN output; +END; +$$ +LANGUAGE 'plpgsql' STABLE STRICT; diff --git a/scripts-available/CDB_DigitSeparator.sql b/scripts-available/CDB_DigitSeparator.sql new file mode 100644 index 0000000..100ec57 --- /dev/null +++ b/scripts-available/CDB_DigitSeparator.sql @@ -0,0 +1,53 @@ +-- Find thousand and decimal digits separators +CREATE OR REPLACE FUNCTION CDB_DigitSeparator (rel REGCLASS, fld TEXT, OUT t CHAR, OUT d CHAR) +as $$ +DECLARE + sql TEXT; + rec RECORD; +BEGIN + + -- We're only interested in rows with either "," or '.' + sql := 'SELECT ' || quote_ident(fld) || ' as f FROM ' || rel::text + || ' WHERE ' || quote_ident(fld) || ' ~ ''[,.]'''; + + FOR rec IN EXECUTE sql + LOOP + -- Any separator appearing more than once + -- will be assumed to be thousand separator + IF rec.f ~ ',.*,' THEN + t := ','; d := '.'; + RETURN; + ELSIF rec.f ~ '\..*\.' THEN + t := '.'; d := ','; + RETURN; + END IF; + + -- If both separator are present, rightmost + -- will be assumed to be decimal separator + IF rec.f ~ '\.' AND rec.f ~ ',' THEN + rec.f = reverse(rec.f); + IF strpos(rec.f, ',') < strpos(rec.f, '.') THEN + t := '.'; d := ','; + ELSE + t := ','; d := '.'; + END IF; + RETURN; + END IF; + + -- A separator NOT followed by 3 digits + -- will be assumed to be decimal separator + IF rec.f ~ ',' AND rec.f !~ '(,[0-9]{3}$)|(,[0-9]{3}[,.])' THEN + t := '.'; d := ','; + RETURN; + ELSIF rec.f ~ '\.' AND rec.f !~ '(\.[0-9]{3}$)|(\.[0-9]{3}[,.])' THEN + t := ','; d := '.'; + RETURN; + END IF; + + -- Otherwise continue looking + + END LOOP; + +END +$$ +LANGUAGE 'plpgsql' STABLE STRICT; diff --git a/scripts-available/CDB_HeadsTailsBins.sql b/scripts-available/CDB_HeadsTailsBins.sql new file mode 100644 index 0000000..1a3b353 --- /dev/null +++ b/scripts-available/CDB_HeadsTailsBins.sql @@ -0,0 +1,46 @@ +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; \ No newline at end of file diff --git a/scripts-available/CDB_Hexagon.sql b/scripts-available/CDB_Hexagon.sql new file mode 100644 index 0000000..131aa12 --- /dev/null +++ b/scripts-available/CDB_Hexagon.sql @@ -0,0 +1,135 @@ +-- Return an Hexagon with given center and side (or maximal radius) +CREATE OR REPLACE FUNCTION CDB_MakeHexagon(center GEOMETRY, radius FLOAT8) +RETURNS GEOMETRY +AS $$ + SELECT ST_MakePolygon(ST_MakeLine(geom)) + FROM + ( + SELECT (ST_DumpPoints(ST_ExteriorRing(ST_Buffer($1, $2, 3)))).* + ) as points + WHERE path[1] % 2 != 0 +$$ LANGUAGE 'sql' IMMUTABLE STRICT; + +-- +-- Fill given extent with an hexagonal coverage +-- +-- @param ext Extent to fill. Only hexagons with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param side Side measure for the hexagon. +-- Maximum diameter will be 2 * side. +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +DROP FUNCTION IF EXISTS CDB_HexagonGrid(ext GEOMETRY, side FLOAT8); +CREATE OR REPLACE FUNCTION CDB_HexagonGrid(ext GEOMETRY, side FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- hexagon + c GEOMETRY; -- center point + rec RECORD; + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + vstart FLOAT8; + vstartary FLOAT8[]; + vstartidx INTEGER; + hskip BIGINT; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + srid INTEGER; +BEGIN + + -- | | + -- |hstep| + -- ______ ___ | + -- vstep / \ ___ / + -- ______ \ ___ / \ + -- / \ ___ / + -- + -- + RAISE DEBUG 'Side: %', side; + + vstep := side * sqrt(3); -- x 2 ? + hstep := side * 1.5; + + RAISE DEBUG 'vstep: %', vstep; + RAISE DEBUG 'hstep: %', hstep; + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + RAISE DEBUG 'X offset: %', xoff; + RAISE DEBUG 'Y offset: %', yoff; + + xgrd := side * 0.5; + ygrd := ( side * sqrt(3) ) / 2.0; + RAISE DEBUG 'X grid size: %', xgrd; + RAISE DEBUG 'Y grid size: %', ygrd; + + -- Tweak horizontal start on hstep*2 grid from origin + hskip := ceil((ST_XMin(ext)-xoff)/hstep); + RAISE DEBUG 'hskip: %', hskip; + hstart := xoff + hskip*hstep; + RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on hstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + IF vstart - (vstep/2.0) < ST_YMin(ext) THEN + vstartary := ARRAY[ vstart + (vstep/2.0), vstart ]; + ELSE + vstartary := ARRAY[ vstart - (vstep/2.0), vstart ]; + END IF; + + vstartidx := abs(hskip)%2; + + RAISE DEBUG 'vstartary: % : %', vstartary[1], vstartary[2]; + RAISE DEBUG 'vstartidx: %', vstartidx; + + c := ST_SetSRID(ST_MakePoint(hstart, vstartary[vstartidx+1]), srid); + h := ST_SnapToGrid(CDB_MakeHexagon(c, side), xoff, yoff, xgrd, ygrd); + vstartidx := (vstartidx + 1) % 2; + WHILE ST_X(c) < hend LOOP -- over X + --RAISE DEBUG 'X loop starts, center point: %', ST_AsText(c); + WHILE ST_Y(c) < vend LOOP -- over Y + --RAISE DEBUG 'Center: %', ST_AsText(c); + --h := ST_SnapToGrid(CDB_MakeHexagon(c, side), xoff, yoff, xgrd, ygrd); + RETURN NEXT h; + h := ST_SnapToGrid(ST_Translate(h, 0, vstep), xoff, yoff, xgrd, ygrd); + c := ST_Translate(c, 0, vstep); -- TODO: drop ? + END LOOP; + -- TODO: translate h direcly ... + c := ST_SetSRID(ST_MakePoint(ST_X(c)+hstep, vstartary[vstartidx+1]), srid); + h := ST_SnapToGrid(CDB_MakeHexagon(c, side), xoff, yoff, xgrd, ygrd); + vstartidx := (vstartidx + 1) % 2; + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; diff --git a/scripts-available/CDB_JenksBins.sql b/scripts-available/CDB_JenksBins.sql new file mode 100644 index 0000000..86d967b --- /dev/null +++ b/scripts-available/CDB_JenksBins.sql @@ -0,0 +1,221 @@ +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + diff --git a/scripts-available/CDB_LatLng.sql b/scripts-available/CDB_LatLng.sql new file mode 100644 index 0000000..d1304c3 --- /dev/null +++ b/scripts-available/CDB_LatLng.sql @@ -0,0 +1,23 @@ +-- +-- Create a valid GEOMETRY in 4326 from a lat/lng pair +-- +-- @param lat A numeric latitude value. +-- +-- @param lng A numeric longitude value. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_LatLng (lat NUMERIC, lng NUMERIC) RETURNS geometry as $$ +BEGIN + -- this function is silly + RETURN ST_SetSRID(ST_MakePoint(lng,lat),4326); +END; +$$ language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_LatLng (lat FLOAT8, lng FLOAT8) RETURNS geometry as $$ +BEGIN + -- this function is silly + RETURN ST_SetSRID(ST_MakePoint(lng,lat),4326); +END; +$$ language plpgsql IMMUTABLE; + diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql new file mode 100644 index 0000000..d0a0374 --- /dev/null +++ b/scripts-available/CDB_QuantileBins.sql @@ -0,0 +1,32 @@ +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- get our unique values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e GROUP BY e ORDER BY e ASC) x; + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i)) x; + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; \ No newline at end of file diff --git a/scripts-available/CDB_QueryStatements.sql b/scripts-available/CDB_QueryStatements.sql new file mode 100644 index 0000000..9c8309a --- /dev/null +++ b/scripts-available/CDB_QueryStatements.sql @@ -0,0 +1,13 @@ +-- Return an array of statements found in the given query text +-- +-- Curtesy of Hubert Lubaczewski (depesz) +-- +CREATE OR REPLACE FUNCTION CDB_QueryStatements(query text) +RETURNS SETOF TEXT AS $$ + SELECT stmt FROM ( + SELECT btrim(q[1], E' \n\t\r;') as stmt FROM ( + SELECT regexp_matches( $1, $REG$((?:[^'"$;]+|"[^"]*"|'(?:[^']*|'')*'|(\$[^$]*\$).*?\2)+)$REG$, 'g' ) as q + ) i + ) j + WHERE stmt <> ''; +$$ language sql IMMUTABLE STRICT; diff --git a/scripts-available/CDB_QueryTables.sql b/scripts-available/CDB_QueryTables.sql new file mode 100644 index 0000000..1ddf675 --- /dev/null +++ b/scripts-available/CDB_QueryTables.sql @@ -0,0 +1,62 @@ +-- Return an array of table names scanned by a given query +-- +-- Requires PostgreSQL 9.x+ +-- +CREATE OR REPLACE FUNCTION CDB_QueryTables(query text) +RETURNS name[] +AS $$ +DECLARE + exp XML; + tables NAME[]; + rec RECORD; + rec2 RECORD; +BEGIN + + tables := '{}'; + + FOR rec IN SELECT CDB_QueryStatements(query) q LOOP + + IF NOT ( rec.q ilike 'select %' or rec.q ilike 'with %' ) THEN + --RAISE WARNING 'Skipping %', rec.q; + CONTINUE; + END IF; + + BEGIN + EXECUTE 'EXPLAIN (FORMAT XML) ' || rec.q INTO STRICT exp; + EXCEPTION WHEN others THEN + -- TODO: if error is 'relation "xxxxxx" does not exist', take xxxxxx as + -- the affected table ? + RAISE WARNING 'CDB_QueryTables cannot explain query: % (%: %)', rec.q, SQLSTATE, SQLERRM; + RAISE EXCEPTION '%', SQLERRM; + CONTINUE; + END; + + -- Now need to extract all values of + + --RAISE DEBUG 'Explain: %', exp; + + FOR rec2 IN WITH + inp AS ( SELECT xpath('//x:Relation-Name/text()', exp, ARRAY[ARRAY['x', 'http://www.postgresql.org/2009/explain']]) as x ) + SELECT unnest(x)::name as p from inp + LOOP + --RAISE DEBUG 'tab: %', rec2.p; + tables := array_append(tables, rec2.p); + END LOOP; + + -- RAISE DEBUG 'Tables: %', tables; + + END LOOP; + + -- RAISE DEBUG 'Tables: %', tables; + + -- Remove duplicates and sort by name + IF array_upper(tables, 1) > 0 THEN + WITH dist as ( SELECT DISTINCT unnest(tables)::text as p ORDER BY p ) + SELECT array_agg(p) from dist into tables; + END IF; + + --RAISE DEBUG 'Tables: %', tables; + + return tables; +END +$$ LANGUAGE 'plpgsql' VOLATILE STRICT; diff --git a/scripts-available/CDB_Quota.sql b/scripts-available/CDB_Quota.sql new file mode 100644 index 0000000..60ac771 --- /dev/null +++ b/scripts-available/CDB_Quota.sql @@ -0,0 +1,50 @@ +-- Return the estimated size of user data. Used for quota checking. +CREATE OR REPLACE FUNCTION CDB_UserDataSize() +RETURNS bigint AS +$$ + -- TODO: double check this query. Maybe use CDB_TableMetadata for lookup ? + -- also, it's "table_name" sounds sensible to search_path + -- + -- NOTE: division by 2 is an hack for the_geom_webmercator + -- + SELECT coalesce(int8(sum(pg_total_relation_size(quote_ident(table_name))) / 2), 0) + AS quota + FROM information_schema.tables + WHERE table_catalog = current_database() AND table_schema = 'public' + AND table_name != 'spatial_ref_sys' + AND table_name != 'cdb_tablemetadata' + AND table_type = 'BASE TABLE'; +$$ +LANGUAGE 'sql' VOLATILE; + +CREATE OR REPLACE FUNCTION CDB_CheckQuota() +RETURNS trigger AS +$$ +DECLARE + + pbfact float8; + qmax int8; + dice float8; + quota float8; +BEGIN + + pbfact := TG_ARGV[0]; + dice := random(); + + -- RAISE DEBUG 'CDB_CheckQuota enter: pbfact=% dice=%', pbfact, dice; + + IF dice < pbfact THEN + RAISE DEBUG 'Checking quota on table % (dice:%, needed:<%)', TG_RELID::text, dice, pbfact; + qmax := public._CDB_UserQuotaInBytes(); + SELECT CDB_UserDataSize() INTO quota; + IF quota > qmax THEN + RAISE EXCEPTION 'Quota exceeded by %KB', (quota-qmax)/1024; + ELSE RAISE DEBUG 'User quota in bytes: % < % (max allowed)', quota, qmax; + END IF; + -- ELSE RAISE DEBUG 'Not checking quota on table % (dice:%, needed:<%)', TG_RELID::text, dice, pbfact; + END IF; + + RETURN NEW; +END; +$$ +LANGUAGE 'plpgsql' VOLATILE; diff --git a/scripts-available/CDB_RandomTids.sql b/scripts-available/CDB_RandomTids.sql new file mode 100644 index 0000000..73bc8c8 --- /dev/null +++ b/scripts-available/CDB_RandomTids.sql @@ -0,0 +1,69 @@ + +-- { +-- +-- Return random TIDs in a table. +-- +-- You can use like this: +-- +-- SELECT * FROM lots_of_points WHERE ctid = ANY ( +-- ARRAY[ (SELECT CDB_RandomTids('lots_of_points', 100000)) ] +-- ); +-- +-- NOTE: +-- It currently doesn't really do it random, but in a +-- equally-distributed way among all tuples. +-- +-- +-- }{ +CREATE OR REPLACE FUNCTION CDB_RandomTids(in_table regclass, in_nsamples integer) + RETURNS tid[] +AS $$ +DECLARE + class_info RECORD; + tuples_per_page INTEGER; + needed_pages INTEGER; + skip_pages INTEGER; + tidlist TID[]; + pnrec RECORD; +BEGIN + + -- (#) estimate pages and tuples-per-page + -- HINT: pg_class.relpages, pg_class.reltuples + SELECT relpages, reltuples + FROM pg_class WHERE oid = in_table + INTO class_info; + + RAISE DEBUG 'Table % has % pages and % tuples', + in_table::text, class_info.relpages, class_info.reltuples; + + IF in_nsamples > class_info.reltuples THEN + RAISE WARNING 'Table has less tuples than requested'; + -- should just perform a sequencial scan here... + END IF; + + tuples_per_page := floor(class_info.reltuples/class_info.relpages); + needed_pages := ceil(in_nsamples::real/tuples_per_page); + + RAISE DEBUG '% tuples per page, we need % pages for % tuples', + tuples_per_page, needed_pages, in_nsamples; + + -- (#) select random pages + -- TODO: see how good this is first + + skip_pages := floor( (class_info.relpages-needed_pages)/(needed_pages+1) ); + + RAISE DEBUG 'we are going to skip % pages at each iteration', + skip_pages; + + SELECT array_agg(t) FROM ( + SELECT '(' || pn || ',' || tn || ')' as t + FROM generate_series(1, tuples_per_page) x(tn), + generate_series(skip_pages+1, class_info.relpages, skip_pages) y(pn) ) f + INTO tidlist; + + RETURN tidlist; + +END +$$ LANGUAGE 'plpgsql' STABLE STRICT; +-- } + diff --git a/scripts-available/CDB_RectangleGrid.sql b/scripts-available/CDB_RectangleGrid.sql new file mode 100644 index 0000000..9410c99 --- /dev/null +++ b/scripts-available/CDB_RectangleGrid.sql @@ -0,0 +1,96 @@ +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; diff --git a/scripts-available/CDB_StringToDate.sql b/scripts-available/CDB_StringToDate.sql new file mode 100644 index 0000000..cbdf4ec --- /dev/null +++ b/scripts-available/CDB_StringToDate.sql @@ -0,0 +1,19 @@ +-- Convert string to date +-- +CREATE OR REPLACE FUNCTION CDB_StringToDate(input character varying) +RETURNS date AS $$ +DECLARE output DATE DEFAULT NULL; +BEGIN + BEGIN + output := input::date; + EXCEPTION WHEN OTHERS THEN + BEGIN + SELECT to_timestamp(input::integer) INTO output; + EXCEPTION WHEN OTHERS THEN + RETURN NULL; + END; + END; +RETURN output; +END; +$$ +LANGUAGE 'plpgsql' STABLE STRICT; diff --git a/scripts-available/CDB_TableIndexes.sql b/scripts-available/CDB_TableIndexes.sql new file mode 100644 index 0000000..c6871fe --- /dev/null +++ b/scripts-available/CDB_TableIndexes.sql @@ -0,0 +1,26 @@ +-- Function returning indexes for a table +CREATE OR REPLACE FUNCTION CDB_TableIndexes(REGCLASS) +RETURNS TABLE(index_name name, index_unique bool, index_primary bool, index_keys text array) +AS $$ + + SELECT pg_class.relname as index_name, + idx.indisunique as index_unique, + idx.indisprimary as index_primary, + ARRAY( + SELECT pg_get_indexdef(idx.indexrelid, k + 1, true) + FROM generate_subscripts(idx.indkey, 1) as k + ORDER BY k + ) as index_keys + FROM pg_indexes, + pg_index as idx + JOIN pg_class + ON pg_class.oid = idx.indexrelid + WHERE pg_indexes.tablename = '' || $1 || '' + AND '' || $1 || '' IN (SELECT CDB_UserTables()) + AND pg_class.relname=pg_indexes.indexname; + +$$ LANGUAGE SQL; + +-- This is a private function, so only the db owner need privileges +REVOKE ALL ON FUNCTION CDB_TableIndexes(REGCLASS) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION CDB_TableIndexes(REGCLASS) TO ":DATABASE_USERNAME"; diff --git a/scripts-available/CDB_TableMetadata.sql b/scripts-available/CDB_TableMetadata.sql new file mode 100644 index 0000000..5ebbc19 --- /dev/null +++ b/scripts-available/CDB_TableMetadata.sql @@ -0,0 +1,96 @@ + +CREATE TABLE IF NOT EXISTS + public.CDB_TableMetadata ( + tabname regclass not null primary key, + updated_at timestamp with time zone not null default now() + ); + +-- Anyone can see this, but updates are only possible trough +-- the security definer trigger +GRANT SELECT ON public.CDB_TableMetadata TO public; + +-- +-- Trigger logging updated_at in the CDB_TableMetadata +-- and notifying cdb_tabledata_update with table name as payload. +-- +-- Attach to tables like this: +-- +-- CREATE trigger track_updates +-- AFTER INSERT OR UPDATE OR TRUNCATE OR DELETE ON +-- FOR EACH STATEMENT +-- EXECUTE PROCEDURE cdb_tablemetadata_trigger(); +-- +-- NOTE: _never_ attach to CDB_TableMetadata ... +-- +CREATE OR REPLACE FUNCTION CDB_TableMetadata_Trigger() +RETURNS trigger AS +$$ +BEGIN + -- Guard against infinite loop + IF TG_RELID = 'public.CDB_TableMetadata'::regclass::oid THEN + RETURN NULL; + END IF; + + -- Cleanup stale entries + DELETE FROM public.CDB_TableMetadata + WHERE NOT EXISTS ( + SELECT oid FROM pg_class WHERE oid = tabname + ); + + WITH nv as ( + SELECT TG_RELID as tabname, NOW() as t + ), updated as ( + UPDATE public.CDB_TableMetadata x SET updated_at = nv.t + FROM nv WHERE x.tabname = nv.tabname + RETURNING x.tabname + ) + INSERT INTO public.CDB_TableMetadata SELECT nv.* + FROM nv LEFT JOIN updated USING(tabname) + WHERE updated.tabname IS NULL; + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +-- +-- Trigger invalidating varnish whenever CDB_TableMetadata +-- record change. +-- +CREATE OR REPLACE FUNCTION _CDB_TableMetadata_Updated() +RETURNS trigger AS +$$ +DECLARE + tabname TEXT; +BEGIN + + IF TG_OP = 'UPDATE' or TG_OP = 'INSERT' THEN + tabname = NEW.tabname; + ELSE + tabname = OLD.tabname; + END IF; + + -- Notify table data update + -- This needs a little bit more of research regarding security issues + -- see https://github.com/CartoDB/cartodb/pull/241 + -- PERFORM pg_notify('cdb_tabledata_update', tabname); + + --RAISE NOTICE 'Table % was updated', tabname; + + -- This will be needed until we'll have someone listening + -- on the event we just broadcasted: + -- + -- LISTEN cdb_tabledata_update; + -- + PERFORM cdb_invalidate_varnish(tabname); + + RETURN NULL; +END; +$$ +LANGUAGE plpgsql VOLATILE SECURITY DEFINER; + +DROP TRIGGER IF EXISTS table_modified ON CDB_TableMetadata; +CREATE TRIGGER table_modified AFTER INSERT OR UPDATE OR DELETE +ON CDB_TableMetadata FOR EACH ROW EXECUTE PROCEDURE + _CDB_TableMetadata_Updated(); + diff --git a/scripts-available/CDB_TransformToWebmercator.sql b/scripts-available/CDB_TransformToWebmercator.sql new file mode 100644 index 0000000..d7ba6fe --- /dev/null +++ b/scripts-available/CDB_TransformToWebmercator.sql @@ -0,0 +1,78 @@ +-- +-- Function to "safely" transform to webmercator +-- +-- This function works around the existance of a valid range +-- for web mercator by "clipping" anything outside to the valid +-- range. +-- +CREATE OR REPLACE FUNCTION CDB_TransformToWebmercator(geom geometry) +RETURNS geometry +AS +$$ +DECLARE + valid_extent GEOMETRY; + latlon_input GEOMETRY; + clipped_input GEOMETRY; + to_webmercator GEOMETRY; + ret GEOMETRY; +BEGIN + + IF ST_Srid(geom) = 3857 THEN + RETURN geom; + END IF; + + -- This is the valid web mercator extent + -- + -- NOTE: some sources set the valid latitude range + -- to -85.0511 to 85.0511 but as long as proj + -- does not complain we are happy + -- + valid_extent := ST_MakeEnvelope(-180, -89, 180, 89, 4326); + + -- Then we transform to WGS84 latlon, which is + -- where we have known coordinates for the clipping + -- + latlon_input := ST_Transform(geom, 4326); + + -- Don't bother clipping if the geometry boundary doesn't + -- go outside the valid extent. + IF latlon_input @ valid_extent THEN + RETURN ST_Transform(latlon_input, 3857); + END IF; + + -- Since we're going to use ST_Intersection on input + -- we'd better ensure the input is valid + -- TODO: only do this if the first ST_Intersection fails ? + IF ST_Dimension(geom) != 0 AND + -- See http://trac.osgeo.org/postgis/ticket/1719 + GeometryType(geom) != 'GEOMETRYCOLLECTION' + THEN + BEGIN + latlon_input := ST_MakeValid(latlon_input); + EXCEPTION + WHEN OTHERS THEN + -- See http://github.com/Vizzuality/cartodb/issues/931 + RAISE WARNING 'Could not clean input geometry: %', SQLERRM; + RETURN NULL; + END; + latlon_input := ST_CollectionExtract(latlon_input, ST_Dimension(geom)+1); + END IF; + + -- Then we clip, trying to retain the input type + -- TODO: catch exceptions here too ? + clipped_input := ST_Intersection(latlon_input, valid_extent); + + -- We transform to web mercator + to_webmercator := ST_Transform(clipped_input, 3857); + + -- Finally we convert EMPTY to NULL + -- See https://github.com/Vizzuality/cartodb/issues/706 + -- And retain "multi" status + ret := CASE WHEN ST_IsEmpty(to_webmercator) THEN NULL::geometry + WHEN GeometryType(geom) LIKE 'MULTI%' THEN ST_Multi(to_webmercator) + ELSE to_webmercator + END; + + RETURN ret; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT; diff --git a/scripts-available/CDB_UserTables.sql b/scripts-available/CDB_UserTables.sql new file mode 100644 index 0000000..fb60fcd --- /dev/null +++ b/scripts-available/CDB_UserTables.sql @@ -0,0 +1,39 @@ +-- Function returning list of cartodb user tables +-- +-- The optional argument restricts the result to tables +-- of the specified access type. +-- +-- Currently accepted permissions are: 'public', 'private' or 'all' +-- +DROP FUNCTION IF EXISTS CDB_UserTables(); -- replaced by: +CREATE OR REPLACE FUNCTION CDB_UserTables(perm text DEFAULT 'all') +RETURNS SETOF information_schema.sql_identifier +AS $$ + WITH usertables AS ( + -- TODO: query CDB_TableMetadata for this ? + -- See http://github.com/CartoDB/cartodb/issues/254#issuecomment-26044777 + SELECT table_name as t + FROM information_schema.tables + WHERE + table_type='BASE TABLE' + AND table_schema='public' + AND table_name NOT IN ( + 'cdb_tablemetadata', + 'spatial_ref_sys' + ) + ), perms AS ( + SELECT t, has_table_privilege('publicuser', 'public.'||t, 'SELECT') as p + FROM usertables + ) + SELECT t FROM perms + WHERE p = CASE WHEN $1 = 'private' THEN false + WHEN $1 = 'public' THEN true + ELSE not p -- none + END + OR $1 = 'all' + ; +$$ LANGUAGE 'sql'; + +-- This is a private function, so only the db owner need privileges +REVOKE ALL ON FUNCTION CDB_UserTables(text) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION CDB_UserTables(text) TO ":DATABASE_USERNAME"; diff --git a/scripts-available/CDB_XYZ.sql b/scripts-available/CDB_XYZ.sql new file mode 100644 index 0000000..92e7dae --- /dev/null +++ b/scripts-available/CDB_XYZ.sql @@ -0,0 +1,79 @@ + +-- { +-- Return pixel resolution at the given zoom level +-- }{ +CREATE OR REPLACE FUNCTION CDB_XYZ_Resolution(z INTEGER) +RETURNS FLOAT8 +AS $$ +DECLARE + earth_circumference FLOAT8; + tile_size INTEGER; + full_resolution FLOAT8; +BEGIN + + -- Earth equatorial circumference in meters (according to wikipedia) + earth_circumference := 40075017; + + -- Size of each tile in pixels (1:1 aspect ratio) + tile_size := 256; + + full_resolution := earth_circumference/tile_size; + + RETURN full_resolution / (power(2,z)); + +END +$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT; +-- } + +-- { +-- Returns a polygon representing the bounding box of a given XYZ tile +-- +-- SRID of the returned polygon is forceably 3857 +-- +-- }{ +CREATE OR REPLACE FUNCTION CDB_XYZ_Extent(x INTEGER, y INTEGER, z INTEGER) +RETURNS GEOMETRY +AS $$ +DECLARE + origin_shift FLOAT8; + initial_resolution FLOAT8; + tile_geo_size FLOAT8; + pixres FLOAT8; + xmin FLOAT8; + ymin FLOAT8; + xmax FLOAT8; + ymax FLOAT8; + earth_circumference FLOAT8; + tile_size INTEGER; +BEGIN + + -- Size of each tile in pixels (1:1 aspect ratio) + tile_size := 256; + + initial_resolution := CDB_XYZ_Resolution(0); + --RAISE DEBUG 'Initial resolution: %', initial_resolution; + + origin_shift := (initial_resolution * tile_size) / 2.0; + -- RAISE DEBUG 'Origin shift (after): %', origin_shift; + + pixres := initial_resolution / (power(2,z)); + --RAISE DEBUG 'Pixel resolution: %', pixres; + + tile_geo_size = tile_size * pixres; + --RAISE DEBUG 'Tile_geo_size: %', tile_geo_size; + + xmin := -origin_shift + x*tile_geo_size; + xmax := -origin_shift + (x+1)*tile_geo_size; + --RAISE DEBUG 'xmin: %', xmin; + --RAISE DEBUG 'xmax: %', xmax; + + ymin := origin_shift - y*tile_geo_size; + ymax := origin_shift - (y+1)*tile_geo_size; + --RAISE DEBUG 'ymin: %', ymin; + --RAISE DEBUG 'ymax: %', ymax; + + RETURN ST_MakeEnvelope(xmin, ymin, xmax, ymax, 3857); + +END +$$ LANGUAGE 'plpgsql' IMMUTABLE STRICT; +-- } diff --git a/scripts-enabled/000-CDB_DateToNumber.sql b/scripts-enabled/000-CDB_DateToNumber.sql new file mode 120000 index 0000000..6c5d30b --- /dev/null +++ b/scripts-enabled/000-CDB_DateToNumber.sql @@ -0,0 +1 @@ +../scripts-available/CDB_DateToNumber.sql \ No newline at end of file diff --git a/scripts-enabled/010-CDB_DigitSeparator.sql b/scripts-enabled/010-CDB_DigitSeparator.sql new file mode 120000 index 0000000..da031d8 --- /dev/null +++ b/scripts-enabled/010-CDB_DigitSeparator.sql @@ -0,0 +1 @@ +../scripts-available/CDB_DigitSeparator.sql \ No newline at end of file diff --git a/scripts-enabled/020-CDB_HeadsTailsBins.sql b/scripts-enabled/020-CDB_HeadsTailsBins.sql new file mode 120000 index 0000000..388c618 --- /dev/null +++ b/scripts-enabled/020-CDB_HeadsTailsBins.sql @@ -0,0 +1 @@ +../scripts-available/CDB_HeadsTailsBins.sql \ No newline at end of file diff --git a/scripts-enabled/030-CDB_Hexagon.sql b/scripts-enabled/030-CDB_Hexagon.sql new file mode 120000 index 0000000..a8a27a4 --- /dev/null +++ b/scripts-enabled/030-CDB_Hexagon.sql @@ -0,0 +1 @@ +../scripts-available/CDB_Hexagon.sql \ No newline at end of file diff --git a/scripts-enabled/040-CDB_JenksBins.sql b/scripts-enabled/040-CDB_JenksBins.sql new file mode 120000 index 0000000..9d0d5f8 --- /dev/null +++ b/scripts-enabled/040-CDB_JenksBins.sql @@ -0,0 +1 @@ +../scripts-available/CDB_JenksBins.sql \ No newline at end of file diff --git a/scripts-enabled/050-CDB_LatLng.sql b/scripts-enabled/050-CDB_LatLng.sql new file mode 120000 index 0000000..f8a546a --- /dev/null +++ b/scripts-enabled/050-CDB_LatLng.sql @@ -0,0 +1 @@ +../scripts-available/CDB_LatLng.sql \ No newline at end of file diff --git a/scripts-enabled/060-CDB_QuantileBins.sql b/scripts-enabled/060-CDB_QuantileBins.sql new file mode 120000 index 0000000..a81f510 --- /dev/null +++ b/scripts-enabled/060-CDB_QuantileBins.sql @@ -0,0 +1 @@ +../scripts-available/CDB_QuantileBins.sql \ No newline at end of file diff --git a/scripts-enabled/070-CDB_QueryStatements.sql b/scripts-enabled/070-CDB_QueryStatements.sql new file mode 120000 index 0000000..bc5ecb6 --- /dev/null +++ b/scripts-enabled/070-CDB_QueryStatements.sql @@ -0,0 +1 @@ +../scripts-available/CDB_QueryStatements.sql \ No newline at end of file diff --git a/scripts-enabled/080-CDB_QueryTables.sql b/scripts-enabled/080-CDB_QueryTables.sql new file mode 120000 index 0000000..758bef3 --- /dev/null +++ b/scripts-enabled/080-CDB_QueryTables.sql @@ -0,0 +1 @@ +../scripts-available/CDB_QueryTables.sql \ No newline at end of file diff --git a/scripts-enabled/090-CDB_Quota.sql b/scripts-enabled/090-CDB_Quota.sql new file mode 120000 index 0000000..c4cbdd7 --- /dev/null +++ b/scripts-enabled/090-CDB_Quota.sql @@ -0,0 +1 @@ +../scripts-available/CDB_Quota.sql \ No newline at end of file diff --git a/scripts-enabled/100-CDB_RandomTids.sql b/scripts-enabled/100-CDB_RandomTids.sql new file mode 120000 index 0000000..ea359d4 --- /dev/null +++ b/scripts-enabled/100-CDB_RandomTids.sql @@ -0,0 +1 @@ +../scripts-available/CDB_RandomTids.sql \ No newline at end of file diff --git a/scripts-enabled/110-CDB_RectangleGrid.sql b/scripts-enabled/110-CDB_RectangleGrid.sql new file mode 120000 index 0000000..95f90da --- /dev/null +++ b/scripts-enabled/110-CDB_RectangleGrid.sql @@ -0,0 +1 @@ +../scripts-available/CDB_RectangleGrid.sql \ No newline at end of file diff --git a/scripts-enabled/120-CDB_StringToDate.sql b/scripts-enabled/120-CDB_StringToDate.sql new file mode 120000 index 0000000..89a4411 --- /dev/null +++ b/scripts-enabled/120-CDB_StringToDate.sql @@ -0,0 +1 @@ +../scripts-available/CDB_StringToDate.sql \ No newline at end of file diff --git a/scripts-enabled/130-CDB_TableMetadata.sql b/scripts-enabled/130-CDB_TableMetadata.sql new file mode 120000 index 0000000..507fda7 --- /dev/null +++ b/scripts-enabled/130-CDB_TableMetadata.sql @@ -0,0 +1 @@ +../scripts-available/CDB_TableMetadata.sql \ No newline at end of file diff --git a/scripts-enabled/140-CDB_TransformToWebmercator.sql b/scripts-enabled/140-CDB_TransformToWebmercator.sql new file mode 120000 index 0000000..85650af --- /dev/null +++ b/scripts-enabled/140-CDB_TransformToWebmercator.sql @@ -0,0 +1 @@ +../scripts-available/CDB_TransformToWebmercator.sql \ No newline at end of file diff --git a/scripts-enabled/150-CDB_UserTables.sql b/scripts-enabled/150-CDB_UserTables.sql new file mode 120000 index 0000000..2ef6405 --- /dev/null +++ b/scripts-enabled/150-CDB_UserTables.sql @@ -0,0 +1 @@ +../scripts-available/CDB_UserTables.sql \ No newline at end of file diff --git a/scripts-enabled/160-CDB_XYZ.sql b/scripts-enabled/160-CDB_XYZ.sql new file mode 120000 index 0000000..2b5d9d8 --- /dev/null +++ b/scripts-enabled/160-CDB_XYZ.sql @@ -0,0 +1 @@ +../scripts-available/CDB_XYZ.sql \ No newline at end of file diff --git a/scripts-enabled/170-CDB_ColumnNames.sql b/scripts-enabled/170-CDB_ColumnNames.sql new file mode 120000 index 0000000..3c3bf4f --- /dev/null +++ b/scripts-enabled/170-CDB_ColumnNames.sql @@ -0,0 +1 @@ +../scripts-available/CDB_ColumnNames.sql \ No newline at end of file diff --git a/scripts-enabled/180-CDB_ColumnType.sql b/scripts-enabled/180-CDB_ColumnType.sql new file mode 120000 index 0000000..4122629 --- /dev/null +++ b/scripts-enabled/180-CDB_ColumnType.sql @@ -0,0 +1 @@ +../scripts-available/CDB_ColumnType.sql \ No newline at end of file diff --git a/scripts-enabled/190-CDB_CartodbfyTable.sql b/scripts-enabled/190-CDB_CartodbfyTable.sql new file mode 120000 index 0000000..2385162 --- /dev/null +++ b/scripts-enabled/190-CDB_CartodbfyTable.sql @@ -0,0 +1 @@ +../scripts-available/CDB_CartodbfyTable.sql \ No newline at end of file diff --git a/scripts-enabled/CDB_TableIndexes.sql b/scripts-enabled/CDB_TableIndexes.sql new file mode 120000 index 0000000..1997db4 --- /dev/null +++ b/scripts-enabled/CDB_TableIndexes.sql @@ -0,0 +1 @@ +../scripts-available/CDB_TableIndexes.sql \ No newline at end of file