--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES -- Complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION observatory" to load this file. \quit -- Version number of the extension release CREATE OR REPLACE FUNCTION cdb_observatory_version() RETURNS text AS $$ SELECT '1.0.2'::text; $$ language 'sql' STABLE STRICT; -- Internal identifier of the installed extension instence -- e.g. 'dev' for current development version CREATE OR REPLACE FUNCTION _cdb_observatory_internal_version() RETURNS text AS $$ SELECT installed_version FROM pg_available_extensions where name='observatory' and pg_available_extensions IS NOT NULL; $$ language 'sql' STABLE STRICT; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_ConnectRemoteTable(fdw_name text, schema_name text, user_dbname text, user_hostname text, username text, user_tablename text, user_schema text) RETURNS void AS $$ DECLARE row record; option record; connection_str json; BEGIN -- Build connection string connection_str := '{"server":{"extensions":"postgis", "dbname":"' || user_dbname ||'", "host":"' || user_hostname ||'", "port":"5432"}, "users":{"public"' || ':{"user":"' || username ||'", "password":""} } }'; -- This function tries to be as idempotent as possible, by not creating anything more than once -- (not even using IF NOT EXIST to avoid throwing warnings) IF NOT EXISTS ( SELECT * FROM pg_extension WHERE extname = 'postgres_fdw') THEN CREATE EXTENSION postgres_fdw; END IF; -- Create FDW first if it does not exist IF NOT EXISTS ( SELECT * FROM pg_foreign_server WHERE srvname = fdw_name) THEN EXECUTE FORMAT('CREATE SERVER %I FOREIGN DATA WRAPPER postgres_fdw', fdw_name); END IF; -- Set FDW settings FOR row IN SELECT p.key, p.value from lateral json_each_text(connection_str->'server') p LOOP IF NOT EXISTS (WITH a AS (select split_part(unnest(srvoptions), '=', 1) as options from pg_foreign_server where srvname=fdw_name) SELECT * from a where options = row.key) THEN EXECUTE FORMAT('ALTER SERVER %I OPTIONS (ADD %I %L)', fdw_name, row.key, row.value); ELSE EXECUTE FORMAT('ALTER SERVER %I OPTIONS (SET %I %L)', fdw_name, row.key, row.value); END IF; END LOOP; -- Create user mappings FOR row IN SELECT p.key, p.value from lateral json_each(connection_str->'users') p LOOP -- Check if entry on pg_user_mappings exists IF NOT EXISTS ( SELECT * FROM pg_user_mappings WHERE srvname = fdw_name AND usename = row.key ) THEN EXECUTE FORMAT ('CREATE USER MAPPING FOR %I SERVER %I', row.key, fdw_name); END IF; -- Update user mapping settings FOR option IN SELECT o.key, o.value from lateral json_each_text(row.value) o LOOP IF NOT EXISTS (WITH a AS (select split_part(unnest(umoptions), '=', 1) as options from pg_user_mappings WHERE srvname = fdw_name AND usename = row.key) SELECT * from a where options = option.key) THEN EXECUTE FORMAT('ALTER USER MAPPING FOR %I SERVER %I OPTIONS (ADD %I %L)', row.key, fdw_name, option.key, option.value); ELSE EXECUTE FORMAT('ALTER USER MAPPING FOR %I SERVER %I OPTIONS (SET %I %L)', row.key, fdw_name, option.key, option.value); END IF; END LOOP; END LOOP; -- Create schema if it does not exist. IF NOT EXISTS ( SELECT * from pg_namespace WHERE nspname=fdw_name) THEN EXECUTE FORMAT ('CREATE SCHEMA %I', fdw_name); END IF; -- Bring the remote cdb_tablemetadata IF NOT EXISTS ( SELECT * FROM PG_CLASS WHERE relnamespace = (SELECT oid FROM pg_namespace WHERE nspname=fdw_name) and relname='cdb_tablemetadata') THEN EXECUTE FORMAT ('CREATE FOREIGN TABLE %I.cdb_tablemetadata (tabname text, updated_at timestamp with time zone) SERVER %I OPTIONS (table_name ''cdb_tablemetadata_text'', schema_name ''public'', updatable ''false'')', fdw_name, fdw_name); END IF; -- Import target table EXECUTE FORMAT ('IMPORT FOREIGN SCHEMA %I LIMIT TO (%I) from SERVER %I INTO %I', user_schema, user_tablename, fdw_name, schema_name); END; $$ LANGUAGE PLPGSQL; -- Returns the table name with geoms for the given geometry_id -- TODO probably needs to take in the column_id array to get the relevant -- table where there is multiple sources for a column from multiple -- geometries. CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GeomTable( geom geometry(Geometry, 4326), geometry_id text, time_span text DEFAULT NULL ) RETURNS TEXT AS $$ DECLARE result text; BEGIN EXECUTE ' SELECT tablename FROM observatory.OBS_table WHERE id IN ( SELECT table_id FROM observatory.OBS_table tab, observatory.OBS_column_table coltable, observatory.OBS_column col WHERE type ILIKE ''geometry'' AND coltable.column_id = col.id AND coltable.table_id = tab.id AND col.id = $1 AND CASE WHEN $3::TEXT IS NOT NULL THEN timespan ILIKE $3::TEXT ELSE TRUE END ORDER BY timespan DESC LIMIT 1 ) ' USING geometry_id, geom, time_span INTO result; return result; END; $$ LANGUAGE plpgsql; -- A function that gets the column data for multiple columns -- Old: OBS_GetColumnData CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetColumnData( geometry_id text, column_ids text[], timespan text ) RETURNS SETOF JSON AS $$ BEGIN -- figure out highest-weight geometry_id/timespan pair for the first data column -- TODO this should be done for each data column separately IF geometry_id IS NULL OR timespan IS NULL THEN EXECUTE ' SELECT data_t.timespan timespan, geom_c.id boundary_id FROM observatory.obs_table data_t, observatory.obs_column_table data_ct, observatory.obs_column data_c, observatory.obs_column_table geoid_ct, observatory.obs_column_to_column c2c, observatory.obs_column geom_c WHERE data_c.id = $2 AND data_ct.column_id = data_c.id AND data_ct.table_id = data_t.id AND geoid_ct.table_id = data_t.id AND geoid_ct.column_id = c2c.source_id AND c2c.reltype = ''geom_ref'' AND geom_c.id = c2c.target_id AND CASE WHEN $3 IS NULL THEN True ELSE $3 = timespan END AND CASE WHEN $1 IS NULL THEN True ELSE $1 = geom_c.id END ORDER BY geom_c.weight DESC, data_t.timespan DESC LIMIT 1 ' INTO timespan, geometry_id USING geometry_id, (column_ids)[1], timespan; END IF; RETURN QUERY EXECUTE ' WITH geomref AS ( SELECT ct.table_id id FROM observatory.OBS_column_to_column c2c, observatory.OBS_column_table ct WHERE c2c.reltype = ''geom_ref'' AND c2c.target_id = $1 AND c2c.source_id = ct.column_id ), column_ids as ( select row_number() over () as no, a.column_id as column_id from (select unnest($2) as column_id) a ) SELECT row_to_json(a) from ( select colname, tablename, aggregate, name, type, c.description, $1 AS boundary_id FROM column_ids, observatory.OBS_column c, observatory.OBS_column_table ct, observatory.OBS_table t WHERE column_ids.column_id = c.id AND c.id = ct.column_id AND t.id = ct.table_id AND t.timespan = $3 AND t.id in (SELECT id FROM geomref) order by column_ids.no ) a ' USING geometry_id, column_ids, timespan RETURN; END; $$ LANGUAGE plpgsql; --Test point cause Stuart always seems to make random points in the water CREATE OR REPLACE FUNCTION cdb_observatory._TestPoint() RETURNS geometry(Point, 4326) AS $$ BEGIN -- new york city RETURN ST_SetSRID(ST_Point( -73.936669, 40.704512), 4326); END; $$ LANGUAGE plpgsql; --Test polygon cause Stuart always seems to make random points in the water -- TODO: remove as it's not used anywhere? CREATE OR REPLACE FUNCTION cdb_observatory._TestArea() RETURNS geometry(Geometry, 4326) AS $$ BEGIN -- Buffer NYC point by 500 meters RETURN ST_Buffer(cdb_observatory._TestPoint()::geography, 500)::geometry; END; $$ LANGUAGE plpgsql; --Used to expand a column based response to a table based one. Give it the desired --columns and it will return a partial query for rolling them out to a table. CREATE OR REPLACE FUNCTION cdb_observatory._OBS_BuildSnapshotQuery(names text[]) RETURNS TEXT AS $$ DECLARE q text; i numeric; BEGIN q := 'SELECT '; FOR i IN 1..array_upper(names,1) LOOP q = q || format(' vals[%s] As %I', i, names[i]); IF i < array_upper(names, 1) THEN q= q || ','; END IF; END LOOP; RETURN q; END; $$ LANGUAGE plpgsql; -- Function that replaces all non digits or letters with _ trims and lowercases the -- passed measure name CREATE OR REPLACE FUNCTION cdb_observatory._OBS_StandardizeMeasureName(measure_name text) RETURNS text AS $$ DECLARE result text; BEGIN -- Turn non letter or digits to _ result = regexp_replace(measure_name, '[^\dA-Za-z]+','_', 'g'); -- Remove duplicate _'s result = regexp_replace(result,'_{2,}','_', 'g'); -- Trim _'s from beginning and end result = trim(both '_' from result); result = lower(result); RETURN result; END; $$ LANGUAGE plpgsql; -- Function that returns the currently deployed obs_dump_version from the -- remote table of the same name. CREATE OR REPLACE FUNCTION cdb_observatory.OBS_DumpVersion( ) RETURNS TEXT AS $$ DECLARE result text; BEGIN EXECUTE ' SELECT MAX(dump_id) FROM observatory.obs_dump_version ' INTO result; RETURN result; END; $$ LANGUAGE plpgsql; --For Longer term Dev --Break out table definitions to types --Automate type creation from a script, something like ----CREATE OR REPLACE FUNCTION OBS_Get<%=tag_name%>(geom GEOMETRY) ----RETURNS TABLE( ----<%=get_dimensions_for_tag(tag_name)%> ----AS $$ ----DECLARE ----target_cols text[]; ----names text[]; ----vals NUMERIC[];- ----q text; ----BEGIN ----target_cols := Array[<%=get_dimensions_for_tag(tag_name)%>], --Functions for augmenting specific tables -------------------------------------------------------------------------------- -- Creates a table of demographic snapshot CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetDemographicSnapshot(geom geometry(Geometry, 4326), time_span text DEFAULT NULL, boundary_id text DEFAULT NULL) RETURNS SETOF JSON AS $$ DECLARE target_cols text[]; BEGIN IF time_span IS NULL THEN time_span = '2010 - 2014'; END IF; IF boundary_id IS NULL THEN boundary_id = 'us.census.tiger.block_group'; END IF; target_cols := Array['us.census.acs.B01003001', 'us.census.acs.B01001002', 'us.census.acs.B01001026', 'us.census.acs.B01002001', 'us.census.acs.B03002003', 'us.census.acs.B03002004', 'us.census.acs.B03002006', 'us.census.acs.B03002012', 'us.census.acs.B03002005', 'us.census.acs.B03002008', 'us.census.acs.B03002009', 'us.census.acs.B03002002', --'not_us_citizen_pop', --'workers_16_and_over', --'commuters_by_car_truck_van', --'commuters_drove_alone', --'commuters_by_carpool', --'commuters_by_public_transportation', --'commuters_by_bus', --'commuters_by_subway_or_elevated', --'walked_to_work', --'worked_at_home', --'children', 'us.census.acs.B11001001', --'population_3_years_over', --'in_school', --'in_grades_1_to_4', --'in_grades_5_to_8', --'in_grades_9_to_12', --'in_undergrad_college', 'us.census.acs.B15003001', 'us.census.acs.B15003017', 'us.census.acs.B15003019', 'us.census.acs.B15003020', 'us.census.acs.B15003021', 'us.census.acs.B15003022', 'us.census.acs.B15003023', --'pop_5_years_over', --'speak_only_english_at_home', --'speak_spanish_at_home', --'pop_determined_poverty_status', --'poverty', 'us.census.acs.B19013001', 'us.census.acs.B19083001', 'us.census.acs.B19301001', 'us.census.acs.B25001001', 'us.census.acs.B25002003', 'us.census.acs.B25004002', 'us.census.acs.B25004004', 'us.census.acs.B25058001', 'us.census.acs.B25071001', 'us.census.acs.B25075001', 'us.census.acs.B25075025', 'us.census.acs.B25081002', --'pop_15_and_over', --'pop_never_married', --'pop_now_married', --'pop_separated', --'pop_widowed', --'pop_divorced', 'us.census.acs.B08134001', 'us.census.acs.B08134002', 'us.census.acs.B08134003', 'us.census.acs.B08134004', 'us.census.acs.B08134005', 'us.census.acs.B08134006', 'us.census.acs.B08134007', 'us.census.acs.B08134008', 'us.census.acs.B08134009', 'us.census.acs.B08134010', 'us.census.acs.B08135001', 'us.census.acs.B19001002', 'us.census.acs.B19001003', 'us.census.acs.B19001004', 'us.census.acs.B19001005', 'us.census.acs.B19001006', 'us.census.acs.B19001007', 'us.census.acs.B19001008', 'us.census.acs.B19001009', 'us.census.acs.B19001010', 'us.census.acs.B19001011', 'us.census.acs.B19001012', 'us.census.acs.B19001013', 'us.census.acs.B19001014', 'us.census.acs.B19001015', 'us.census.acs.B19001016', 'us.census.acs.B19001017']; RETURN QUERY EXECUTE 'select * from cdb_observatory._OBS_Get($1, $2, $3, $4 )' USING geom, target_cols, time_span, boundary_id RETURN; END; $$ LANGUAGE plpgsql; --Base functions for performing augmentation ---------------------------------------------------------------------------------------- -- Base augmentation fucntion. CREATE OR REPLACE FUNCTION cdb_observatory._OBS_Get( geom geometry(Geometry, 4326), column_ids text[], time_span text, geometry_level text ) RETURNS SETOF JSON AS $$ DECLARE results json[]; geom_table_name text; names text[]; query text; data_table_info json[]; BEGIN EXECUTE 'SELECT array_agg(_obs_getcolumndata) FROM cdb_observatory._OBS_GetColumnData($1, $2, $3);' INTO data_table_info USING geometry_level, column_ids, time_span; IF geometry_level IS NULL THEN geometry_level = data_table_info[1]->>'boundary_id'; END IF; geom_table_name := cdb_observatory._OBS_GeomTable(geom, geometry_level); IF geom_table_name IS NULL THEN --raise notice 'Point % is outside of the data region', ST_AsText(geom); -- TODO this should return JSON RETURN QUERY SELECT '{}'::json; RETURN; END IF; IF data_table_info IS NULL THEN --raise notice 'Cannot find data table for boundary ID %, column_ids %, and time_span %', geometry_level, column_ids, time_span; END IF; IF ST_GeometryType(geom) = 'ST_Point' THEN --raise notice 'geom_table_name %, data_table_info %', geom_table_name, data_table_info::json[]; results := cdb_observatory._OBS_GetPoints(geom, geom_table_name, data_table_info); ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN results := cdb_observatory._OBS_GetPolygons(geom, geom_table_name, data_table_info); END IF; RETURN QUERY EXECUTE $query$ SELECT unnest($1) $query$ USING results; RETURN; END; $$ LANGUAGE plpgsql; -- If the variable of interest is just a rate return it as such, -- otherwise normalize it to the census block area and return that CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPoints( geom geometry(Geometry, 4326), geom_table_name text, -- TODO: change to boundary_id data_table_info json[] ) RETURNS json[] AS $$ DECLARE result NUMERIC[]; json_result json[]; query text; i int; geoid text; data_geoid_colname text; geom_geoid_colname text; area NUMERIC; BEGIN -- TODO we're assuming our geom_table has only one geom_ref column -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE format('SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id AND t.tablename = %L' , (data_table_info)[1]->>'tablename') INTO data_geoid_colname; EXECUTE format('SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id AND t.tablename = %L' , geom_table_name) INTO geom_geoid_colname; EXECUTE format('SELECT %I FROM observatory.%I WHERE ST_Within($1, the_geom)', geom_geoid_colname, geom_table_name) USING geom INTO geoid; --raise notice 'geoid is %, geometry table is % ', geoid, geom_table_name; EXECUTE format('SELECT ST_Area(the_geom::geography) / (1000 * 1000) FROM observatory.%I WHERE %I = %L', geom_table_name, geom_geoid_colname, geoid) INTO area; IF area IS NULL THEN --raise notice 'No geometry at %', ST_AsText(geom); END IF; query := 'SELECT Array['; FOR i IN 1..array_upper(data_table_info, 1) LOOP IF area is NULL OR area = 0 THEN -- give back null values query := query || format('NULL::numeric '); ELSIF ((data_table_info)[i])->>'aggregate' != 'sum' THEN -- give back full variable query := query || format('%I ', ((data_table_info)[i])->>'colname'); ELSE -- give back variable normalized by area of geography query := query || format('%I/%s ', ((data_table_info)[i])->>'colname', area); END IF; IF i < array_upper(data_table_info, 1) THEN query := query || ','; END IF; END LOOP; query := query || format(' ]::numeric[] FROM observatory.%I WHERE %I.%I = %L ', ((data_table_info)[1])->>'tablename', ((data_table_info)[1])->>'tablename', data_geoid_colname, geoid ); EXECUTE query INTO result USING geom; EXECUTE $query$ SELECT array_agg(row_to_json(t)) FROM ( SELECT values As value, meta->>'name' As name, meta->>'tablename' As tablename, meta->>'aggregate' As aggregate, meta->>'type' As type, meta->>'description' As description FROM (SELECT unnest($1) As values, unnest($2) As meta) b ) t $query$ INTO json_result USING result, data_table_info; RETURN json_result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure( geom geometry(Geometry, 4326), measure_id TEXT, normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL ) RETURNS NUMERIC AS $$ DECLARE geom_type TEXT; map_type TEXT; numer_aggregate TEXT; numer_colname TEXT; numer_geomref_colname TEXT; numer_tablename TEXT; denom_colname TEXT; denom_geomref_colname TEXT; denom_tablename TEXT; geom_colname TEXT; geom_geomref_colname TEXT; geom_tablename TEXT; result NUMERIC; sql TEXT; numer_name TEXT; BEGIN EXECUTE $query$ SELECT numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename, denom_colname, denom_geomref_colname, denom_tablename, geom_colname, geom_geomref_colname, geom_tablename, numer_name FROM observatory.obs_meta WHERE (geom_id = $1 OR ($1 = '')) AND numer_id = $2 AND (numer_timespan = $3 OR ($3 = '')) ORDER BY geom_weight DESC, numer_timespan DESC LIMIT 1 $query$ INTO numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename, denom_colname, denom_geomref_colname, denom_tablename, geom_colname, geom_geomref_colname, geom_tablename, numer_name USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''); IF ST_GeometryType(geom) = 'ST_Point' THEN geom_type := 'point'; ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN geom_type := 'polygon'; ELSE RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''', ST_GeometryType(geom); END IF; IF normalize ILIKE 'area' AND numer_aggregate ILIKE 'sum' THEN map_type := 'areaNormalized'; ELSIF normalize ILIKE 'denominator' THEN map_type := 'denominated'; ELSE -- defaults: area normalization for point if it's possible and none for -- polygon or non-summable point IF geom_type = 'point' AND numer_aggregate ILIKE 'sum' THEN map_type := 'areaNormalized'; ELSE map_type := 'predenominated'; END IF; END IF; IF geom_type = 'point' THEN IF map_type = 'areaNormalized' THEN sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1) SELECT numer.%I / (SELECT area FROM _geom) FROM observatory.%I numer WHERE numer.%I = (SELECT geom_ref FROM _geom)', geom_colname, geom_geomref_colname, geom_tablename, geom, geom_colname, numer_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0) FROM observatory.%I numer WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', numer_colname, denom_colname, denom_tablename, denom_geomref_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname, geom_geomref_colname, geom_tablename, geom, geom_colname); ELSIF map_type = 'predenominated' THEN sql = format('SELECT numer.%I FROM observatory.%I numer WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', numer_colname, numer_tablename, numer_geomref_colname, geom_geomref_colname, geom_tablename, geom, geom_colname); END IF; ELSIF geom_type = 'polygon' THEN IF map_type = 'areaNormalized' THEN sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom WHERE ST_Intersects(%L, geom.%I) AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / ST_Area(%L::Geography) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom, geom_colname, geom_colname, geom_geomref_colname, geom_tablename, geom, geom_colname, geom, geom_colname, geom_colname, numer_colname, numer_geomref_colname, geom, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom WHERE ST_Intersects(%L, geom.%I) AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0), _denom AS (SELECT denom.%I, denom.%I geom_ref FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / SUM((SELECT _denom.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = _denom.geom_ref) FROM _denom WHERE _denom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom, geom_colname, geom_colname, geom_geomref_colname, geom_tablename, geom, geom_colname, geom, geom_colname, geom_colname, denom_colname, denom_geomref_colname, denom_tablename, denom_geomref_colname, numer_colname, numer_geomref_colname, denom_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'predenominated' THEN IF numer_aggregate NOT ILIKE 'sum' THEN RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead', numer_name, numer_id; ELSE sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom WHERE ST_Intersects(%L, geom.%I) AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom, geom_colname, geom_colname, geom_geomref_colname, geom_tablename, geom, geom_colname, geom, geom_colname, geom_colname, numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); END IF; END IF; END IF; EXECUTE sql INTO result; RETURN result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureById( geom_ref TEXT, measure_id TEXT, boundary_id TEXT, time_span TEXT DEFAULT NULL ) RETURNS NUMERIC AS $$ DECLARE target_table TEXT; colname TEXT; measure_val NUMERIC; data_geoid_colname TEXT; BEGIN EXECUTE $query$ SELECT numer_colname, numer_geomref_colname, numer_tablename FROM observatory.obs_meta WHERE (geom_id = $1 OR ($1 = '')) AND numer_id = $2 AND (numer_timespan = $3 OR ($3 = '')) ORDER BY geom_weight DESC, numer_timespan DESC LIMIT 1 $query$ INTO colname, data_geoid_colname, target_table USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''); --RAISE DEBUG 'target_table %, colname %', target_table, colname; EXECUTE format( 'SELECT %I FROM observatory.%I data WHERE data.%I = %L', colname, target_table, data_geoid_colname, geom_ref) INTO measure_val; RETURN measure_val; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetCategory( geom geometry(Geometry, 4326), category_id TEXT, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL ) RETURNS TEXT AS $$ DECLARE data_table TEXT; geom_table TEXT; colname TEXT; data_geomref_colname TEXT; geom_geomref_colname TEXT; geom_colname TEXT; category_val TEXT; category_share NUMERIC; BEGIN EXECUTE $query$ SELECT numer_colname, numer_geomref_colname, numer_tablename, geom_geomref_colname, geom_colname, geom_tablename FROM observatory.obs_meta WHERE (geom_id = $1 OR ($1 = '')) AND numer_id = $2 AND (numer_timespan = $3 OR ($3 = '')) ORDER BY geom_weight DESC, numer_timespan DESC LIMIT 1 $query$ INTO colname, data_geomref_colname, data_table, geom_geomref_colname, geom_colname, geom_table USING COALESCE(boundary_id, ''), category_id, COALESCE(time_span, ''); IF ST_GeometryType(geom) = 'ST_Point' THEN EXECUTE format( 'SELECT data.%I FROM observatory.%I data, observatory.%I geom WHERE data.%I = geom.%I AND ST_WITHIN(%L, geom.%I) ', colname, data_table, geom_table, data_geomref_colname, geom_geomref_colname, geom, geom_colname) INTO category_val; ELSE -- favor the category with the most area EXECUTE format( 'SELECT data.%I category, SUM(overlap_fraction) category_share FROM observatory.%I data, ( SELECT ST_Area( ST_Intersection(%L, a.%I) ) / ST_Area(%L) AS overlap_fraction, a.%I geomref FROM observatory.%I as a WHERE %L && a.%I) _overlaps WHERE data.%I = _overlaps.geomref GROUP BY category ORDER BY SUM(overlap_fraction) DESC LIMIT 1', colname, data_table, geom, geom_colname, geom, geom_geomref_colname, geom_table, geom, geom_colname, data_geomref_colname) INTO category_val, category_share; END IF; RETURN category_val; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetUSCensusMeasure( geom geometry(Geometry, 4326), name TEXT, normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL ) RETURNS NUMERIC AS $$ DECLARE standardized_name text; measure_id text; result NUMERIC; BEGIN standardized_name = cdb_observatory._OBS_StandardizeMeasureName(name); EXECUTE $string$ SELECT c.id FROM observatory.obs_column c JOIN observatory.obs_column_tag ct ON c.id = ct.column_id WHERE cdb_observatory._OBS_StandardizeMeasureName(c.name) = $1 AND ct.tag_id ILIKE 'us.census%' $string$ INTO measure_id USING standardized_name; EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure($1, $2, $3, $4, $5)' INTO result USING geom, measure_id, normalize, boundary_id, time_span; RETURN result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetUSCensusCategory( geom geometry(Geometry, 4326), name TEXT, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL ) RETURNS TEXT AS $$ DECLARE standardized_name text; category_id text; result TEXT; BEGIN standardized_name = cdb_observatory._OBS_StandardizeMeasureName(name); EXECUTE $string$ SELECT c.id FROM observatory.obs_column c --JOIN observatory.obs_column_tag ct -- ON c.id = ct.column_id WHERE cdb_observatory._OBS_StandardizeMeasureName(c.name) = $1 AND c.type ILIKE 'TEXT' AND c.id ILIKE 'us.census%' -- TODO this should be done by tag --AND ct.tag_id = 'us.census.acs.demographics' $string$ INTO category_id USING standardized_name; EXECUTE 'SELECT cdb_observatory.OBS_GetCategory($1, $2, $3, $4)' INTO result USING geom, category_id, boundary_id, time_span; RETURN result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPopulation( geom geometry(Geometry, 4326), normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL ) RETURNS NUMERIC AS $$ DECLARE population_measure_id TEXT; result NUMERIC; BEGIN -- TODO use a super-column for global pop population_measure_id := 'us.census.acs.B01003001'; EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure( %L, %L, %L, %L, %L ) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span) INTO result; return result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPolygons( geom geometry(Geometry, 4326), geom_table_name text, data_table_info json[] ) RETURNS json[] AS $$ DECLARE result numeric[]; json_result json[]; q_select text; q_sum text; q text; i NUMERIC; data_geoid_colname text; geom_geoid_colname text; BEGIN -- TODO we're assuming our geom_table has only one geom_ref column -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE format('SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id AND t.tablename = %L' , (data_table_info)[1]->>'tablename') INTO data_geoid_colname; EXECUTE format('SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id AND t.tablename = %L' , geom_table_name) INTO geom_geoid_colname; q_select := format('SELECT %I, ', data_geoid_colname); q_sum := 'SELECT Array['; FOR i IN 1..array_upper(data_table_info, 1) LOOP q_select := q_select || format( '%I ', ((data_table_info)[i])->>'colname'); IF ((data_table_info)[i])->>'aggregate' ='sum' THEN q_sum := q_sum || format('sum(overlap_fraction * COALESCE(%I, 0)) ',((data_table_info)[i])->>'colname',((data_table_info)[i])->>'colname'); ELSE q_sum := q_sum || ' NULL::numeric '; END IF; IF i < array_upper(data_table_info,1) THEN q_select := q_select || format(','); q_sum := q_sum || format(','); END IF; END LOOP; q := format(' WITH _overlaps As ( SELECT ST_Area( ST_Intersection($1, a.the_geom) ) / ST_Area(a.the_geom) As overlap_fraction, %I FROM observatory.%I As a WHERE $1 && a.the_geom ), values As ( ', geom_geoid_colname, geom_table_name); q := q || q_select || format('FROM observatory.%I ', ((data_table_info)[1]->>'tablename')); q := format(q || ' ) ' || q_sum || ' ]::numeric[] FROM _overlaps, values WHERE values.%I = _overlaps.%I', data_geoid_colname, geom_geoid_colname); EXECUTE q INTO result USING geom; EXECUTE $query$ SELECT array_agg(row_to_json(t)) FROM ( SELECT values As value, meta->>'name' As name, meta->>'tablename' As tablename, meta->>'aggregate' As aggregate, meta->>'type' As type, meta->>'description' As description FROM (SELECT unnest($1) As values, unnest($2) As meta) b ) t $query$ INTO json_result USING result, data_table_info; RETURN json_result; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetSegmentSnapshot( geom geometry(Geometry, 4326), boundary_id text DEFAULT NULL ) RETURNS JSON AS $$ DECLARE target_cols text[]; result json; seg_name Text; geom_id Text; q Text; segment_names Text[]; BEGIN IF boundary_id IS NULL THEN boundary_id = 'us.census.tiger.census_tract'; END IF; target_cols := Array[ 'us.census.acs.B01003001_quantile', 'us.census.acs.B01001002_quantile', 'us.census.acs.B01001026_quantile', 'us.census.acs.B01002001_quantile', 'us.census.acs.B03002003_quantile', 'us.census.acs.B03002004_quantile', 'us.census.acs.B03002006_quantile', 'us.census.acs.B03002012_quantile', 'us.census.acs.B05001006_quantile',-- 'us.census.acs.B08006001_quantile',-- 'us.census.acs.B08006002_quantile',-- 'us.census.acs.B08006008_quantile',-- 'us.census.acs.B08006009_quantile',-- 'us.census.acs.B08006011_quantile',-- 'us.census.acs.B08006015_quantile',-- 'us.census.acs.B08006017_quantile',-- 'us.census.acs.B09001001_quantile',-- 'us.census.acs.B11001001_quantile', 'us.census.acs.B14001001_quantile',-- 'us.census.acs.B14001002_quantile',-- 'us.census.acs.B14001005_quantile',-- 'us.census.acs.B14001006_quantile',-- 'us.census.acs.B14001007_quantile',-- 'us.census.acs.B14001008_quantile',-- 'us.census.acs.B15003001_quantile', 'us.census.acs.B15003017_quantile', 'us.census.acs.B15003022_quantile', 'us.census.acs.B15003023_quantile', 'us.census.acs.B16001001_quantile',-- 'us.census.acs.B16001002_quantile',-- 'us.census.acs.B16001003_quantile',-- 'us.census.acs.B17001001_quantile',-- 'us.census.acs.B17001002_quantile',-- 'us.census.acs.B19013001_quantile', 'us.census.acs.B19083001_quantile', 'us.census.acs.B19301001_quantile', 'us.census.acs.B25001001_quantile', 'us.census.acs.B25002003_quantile', 'us.census.acs.B25004002_quantile', 'us.census.acs.B25004004_quantile', 'us.census.acs.B25058001_quantile', 'us.census.acs.B25071001_quantile', 'us.census.acs.B25075001_quantile', 'us.census.acs.B25075025_quantile' ]; EXECUTE $query$ SELECT array_agg(_OBS_GetCategories->>'category') FROM cdb_observatory._OBS_GetCategories( $1, Array['us.census.spielman_singleton_segments.X10', 'us.census.spielman_singleton_segments.X55'], $2) $query$ INTO segment_names USING geom, boundary_id; q := format($query$ WITH a As ( SELECT array_agg(_OBS_GET->>'name') As names, array_agg(_OBS_GET->>'value') As vals FROM cdb_observatory._OBS_Get($1, $2, '2010 - 2014', $3) ), percentiles As ( %s FROM a) SELECT row_to_json(r) FROM ( SELECT $4 as x10_segment, $5 as x55_segment, percentiles.* FROM percentiles) r $query$, cdb_observatory._OBS_BuildSnapshotQuery(target_cols)) results; EXECUTE q into result USING geom, target_cols, boundary_id, segment_names[1], segment_names[2]; return result; END; $$ LANGUAGE plpgsql; --Get categorical variables from point CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetCategories( geom geometry(Geometry, 4326), dimension_names text[], boundary_id text DEFAULT NULL, time_span text DEFAULT NULL ) RETURNS SETOF JSON as $$ DECLARE geom_table_name text; geoid text; names text[]; results text[]; query text; data_table_info json[]; BEGIN IF time_span IS NULL THEN time_span = '2010 - 2014'; END IF; IF boundary_id IS NULL THEN boundary_id = 'us.census.tiger.block_group'; END IF; geom_table_name := cdb_observatory._OBS_GeomTable(geom, boundary_id); IF geom_table_name IS NULL THEN --raise notice 'Point % is outside of the data region', ST_AsText(geom); RETURN QUERY SELECT '{}'::text[], '{}'::text[]; RETURN; END IF; EXECUTE ' SELECT array_agg(_obs_getcolumndata) FROM cdb_observatory._OBS_GetColumnData($1, $2, $3); ' INTO data_table_info USING boundary_id, dimension_names, time_span; IF data_table_info IS NULL THEN --raise notice 'No data table found for this location'; RETURN QUERY SELECT NULL::json; RETURN; END IF; EXECUTE format('SELECT geoid FROM observatory.%I WHERE the_geom && $1', geom_table_name) USING geom INTO geoid; IF geoid IS NULL THEN --raise notice 'No geometry id for this location'; RETURN QUERY SELECT NULL::json; RETURN; END IF; query := 'SELECT ARRAY['; FOR i IN 1..array_upper(data_table_info, 1) LOOP query = query || format('%I ', lower(((data_table_info)[i])->>'colname')); IF i < array_upper(data_table_info, 1) THEN query := query || ','; END IF; END LOOP; query := query || format(' ]::text[] FROM observatory.%I WHERE %I.geoid = %L ', ((data_table_info)[1])->>'tablename', ((data_table_info)[1])->>'tablename', geoid ); EXECUTE query INTO results USING geom; RETURN QUERY EXECUTE $query$ SELECT row_to_json(t) FROM ( SELECT categories As category, meta->>'name' As name, meta->>'tablename' As tablename, meta->>'aggregate' As aggregate, meta->>'type' As type, meta->>'description' As description FROM (SELECT unnest($1) As categories, unnest($2) As meta) As b ) t $query$ USING results, data_table_info; RETURN; END; $$ LANGUAGE plpgsql; -- return a table that contains a string match based on input -- TODO: implement search for timespan CREATE OR REPLACE FUNCTION cdb_observatory._OBS_SearchTables( search_term text, time_span text DEFAULT NULL ) RETURNS table(tablename text, timespan text) As $$ DECLARE out_var text[]; BEGIN IF time_span IS NULL THEN RETURN QUERY EXECUTE 'SELECT tablename::text, timespan::text FROM observatory.obs_table t JOIN observatory.obs_column_table ct ON ct.table_id = t.id JOIN observatory.obs_column c ON ct.column_id = c.id WHERE c.type ILIKE ''geometry'' AND c.id = $1' USING search_term; RETURN; ELSE RETURN QUERY EXECUTE 'SELECT tablename::text, timespan::text FROM observatory.obs_table t JOIN observatory.obs_column_table ct ON ct.table_id = t.id JOIN observatory.obs_column c ON ct.column_id = c.id WHERE c.type ILIKE ''geometry'' AND c.id = $1 AND t.timespan = $2' USING search_term, time_span; RETURN; END IF; END; $$ LANGUAGE plpgsql IMMUTABLE; -- Functions used to search the observatory for measures -------------------------------------------------------------------------------- -- TODO allow the user to specify the boundary to search for measures -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_Search( search_term text, relevant_boundary text DEFAULT null ) RETURNS TABLE(id text, description text, name text, aggregate text, source text) as $$ DECLARE boundary_term text; BEGIN IF relevant_boundary then boundary_term = ''; else boundary_term = ''; END IF; RETURN QUERY EXECUTE format($string$ SELECT id::text, description::text, name::text, aggregate::text, NULL::TEXT source -- TODO use tags FROM observatory.OBS_column where name ilike '%%' || %L || '%%' or description ilike '%%' || %L || '%%' %s $string$, search_term, search_term,boundary_term); RETURN; END $$ LANGUAGE plpgsql; -- Functions to return the geometry levels that a point is part of -------------------------------------------------------------------------------- -- TODO add test response CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableBoundaries( geom geometry(Geometry, 4326), timespan text DEFAULT null) RETURNS TABLE(boundary_id text, description text, time_span text, tablename text) as $$ DECLARE timespan_query TEXT DEFAULT ''; BEGIN IF timespan != NULL THEN timespan_query = format('AND timespan = %L', timespan); END IF; RETURN QUERY EXECUTE $string$ SELECT column_id::text As column_id, obs_column.description::text As description, timespan::text As timespan, tablename::text As tablename FROM observatory.OBS_table, observatory.OBS_column_table, observatory.OBS_column WHERE observatory.OBS_column_table.column_id = observatory.obs_column.id AND observatory.OBS_column_table.table_id = observatory.obs_table.id AND observatory.OBS_column.type = 'Geometry' AND ST_Intersects($1, st_setsrid(observatory.obs_table.the_geom, 4326)) $string$ || timespan_query USING geom; RETURN; END $$ LANGUAGE plpgsql; -- Data Observatory -- Welcome to the Future -- These Data Observatory functions provide access to boundary polyons (and -- their ids) such as those available through the US Census Tiger, Who's on -- First, the Spanish Census, and so on -- OBS_GetBoundary -- -- Returns the boundary polygon(s) that overlap with the input point geometry. -- From an input point geometry, find the boundary which intersects with the -- centroid of the input geometry -- Inputs: -- geom geometry: input point geometry -- boundary_id text: source id of boundaries -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- boundary geometry: geometry boundary that intersects with geom, is at the -- resolution requested with boundary_id, and time_span -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundary( geom geometry(Point, 4326), boundary_id text, time_span text DEFAULT NULL) RETURNS geometry(Geometry, 4326) AS $$ DECLARE boundary geometry(Geometry, 4326); target_table text; BEGIN -- TODO: Check if SRID = 4326, if not transform? -- if not a point, raise error IF ST_GeometryType(geom) != 'ST_Point' THEN RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_Point''', ST_GeometryType(geom); END IF; -- choose appropriate table based on time_span IF time_span IS NULL THEN SELECT x.target_tables INTO target_table FROM cdb_observatory._OBS_SearchTables(boundary_id, time_span) As x(target_tables, timespans) ORDER BY x.timespans DESC LIMIT 1; ELSE -- TODO: modify for only one table returned instead of arbitrarily choosing -- one with LIMIT 1 (could be conflict between clipped vs non-clipped -- boundaries in the metadata tables) SELECT x.target_tables INTO target_table FROM cdb_observatory._OBS_SearchTables(boundary_id, time_span) As x(target_tables, timespans) WHERE x.timespans = time_span LIMIT 1; END IF; -- if no tables are found, raise notice and return null IF target_table IS NULL THEN --RAISE NOTICE 'No boundaries found for ''%'' in ''%''', ST_AsText(geom), boundary_id; RETURN NULL::geometry; END IF; --RAISE NOTICE 'target_table: %', target_table; -- return the first boundary in intersections EXECUTE format( 'SELECT the_geom FROM observatory.%I WHERE ST_Intersects($1, the_geom) LIMIT 1', target_table) INTO boundary USING geom; RETURN boundary; END; $$ LANGUAGE plpgsql; -- OBS_GetBoundaryId -- -- retrieves the boundary identifier (e.g., '36047' = Kings County/Brooklyn, NY) -- corresponding to the location geom and boundary types (e.g., -- us.census.tiger.county) -- Inputs: -- geom geometry: location where the boundary is requested to overlap with -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- geometry_id text: identifier of the geometry which overlaps with the input -- point geom in the table corresponding to boundary_id and -- time_span -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundaryId( geom geometry(Point, 4326), boundary_id text, time_span text DEFAULT NULL ) RETURNS text AS $$ DECLARE output_id text; target_table text; geoid_colname text; BEGIN -- If not point, raise error IF ST_GeometryType(geom) != 'ST_Point' THEN RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_Point''', ST_GeometryType(geom); END IF; -- choose appropriate table based on time_span IF time_span IS NULL THEN SELECT x.target_tables INTO target_table FROM cdb_observatory._OBS_SearchTables(boundary_id, time_span) As x(target_tables, timespans) ORDER BY x.timespans DESC LIMIT 1; ELSE SELECT x.target_tables INTO target_table FROM cdb_observatory._OBS_SearchTables(boundary_id, time_span) As x(target_tables, timespans) WHERE x.timespans = time_span LIMIT 1; END IF; -- if no tables are found, raise notice and return null IF target_table IS NULL THEN --RAISE NOTICE 'Warning: No boundaries found for ''%''', boundary_id; RETURN NULL::text; END IF; EXECUTE format('SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id AND t.tablename = %L' , target_table) INTO geoid_colname; --RAISE NOTICE 'target_table: %, geoid_colname: %', target_table, geoid_colname; -- return geometry id column value EXECUTE format( 'SELECT %I::text FROM observatory.%I WHERE ST_Intersects($1, the_geom) LIMIT 1', geoid_colname, target_table) INTO output_id USING geom; RETURN output_id; END; $$ LANGUAGE plpgsql; -- OBS_GetBoundaryById -- -- Given a geometry reference (e.g., geoid for US Census), and it's geometry -- level (see OBS_ListGeomColumns() for all available boundary ids), give back -- the boundary that corresponds to that geometry_id, boundary_id, and -- time_span -- Inputs: -- geometry_id text: geometry id of the requested boundary -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- boundary geometry: geometry boundary that matches geometry_id, is at the -- resolution requested with boundary_id, and time_span -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundaryById( geometry_id text, -- ex: '36047' boundary_id text, -- ex: 'us.census.tiger.county' time_span text DEFAULT NULL -- ex: '2009' ) RETURNS geometry(geometry, 4326) AS $$ DECLARE boundary geometry(geometry, 4326); target_table text; geoid_colname text; geom_colname text; BEGIN SELECT * INTO geoid_colname, target_table, geom_colname FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); --RAISE NOTICE '%', target_table; IF target_table IS NULL THEN --RAISE NOTICE 'No geometries found'; RETURN NULL::geometry; END IF; -- retrieve boundary EXECUTE format( 'SELECT %I FROM observatory.%I WHERE %I = $1 LIMIT 1', geom_colname, target_table, geoid_colname) INTO boundary USING geometry_id; RETURN boundary; END; $$ LANGUAGE plpgsql; -- _OBS_GetBoundariesByGeometry -- internal function for retrieving geometries based on an input geometry -- see OBS_GetBoundariesByGeometry or OBS_GetBoundariesByPointAndRadius for -- more information CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetBoundariesByGeometry( geom geometry(Geometry, 4326), boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ DECLARE boundary geometry(Geometry, 4326); geom_colname text; geoid_colname text; target_table text; BEGIN -- check inputs IF lower(overlap_type) NOT IN ('contains', 'intersects', 'within') THEN -- recognized overlap type (map to ST_Contains, ST_Intersects, and ST_Within) RAISE EXCEPTION 'Overlap type ''%'' is not an accepted type (choose intersects, within, or contains)', overlap_type; ELSIF ST_GeometryType(geom) NOT IN ('ST_Polygon', 'ST_MultiPolygon') THEN RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_MultiPolygon'' or ''ST_Polygon''', ST_GeometryType(geom); END IF; -- TODO: add timespan in search -- TODO: add overlap info in search SELECT * INTO geoid_colname, target_table, geom_colname FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); -- if no tables are found, raise notice and return null IF target_table IS NULL THEN --RAISE NOTICE 'No boundaries found for bounding box ''%'' in ''%''', ST_AsText(geom), boundary_id; RETURN QUERY SELECT NULL::geometry, NULL::text; RETURN; END IF; --RAISE NOTICE 'target_table: %', target_table; -- return first boundary in intersections RETURN QUERY EXECUTE format( 'SELECT %I, %I::text FROM observatory.%I WHERE ST_%s($1, the_geom) ', geom_colname, geoid_colname, target_table, overlap_type) USING geom; RETURN; END; $$ LANGUAGE plpgsql; -- OBS_GetBoundariesByGeometry -- -- Given a bounding box (or a polygon), and it's geometry level (see -- OBS_ListGeomColumns() for all available boundary ids), give back the -- boundaries that are contained within the bounding box polygon and the -- associated geometry ids -- Inputs: -- geom geometry: bounding box (or polygon) of the region of interest -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- table with the following columns -- boundary geometry: geometry boundary that is contained within the input -- bounding box at the requested geometry level -- with boundary_id, and time_span -- geom_refs text: geometry identifiers (e.g., geoid for the US Census) -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundariesByGeometry( geom geometry(Geometry, 4326), boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ BEGIN RETURN QUERY SELECT * FROM cdb_observatory._OBS_GetBoundariesByGeometry( geom, boundary_id, time_span, overlap_type ); RETURN; END; $$ LANGUAGE plpgsql; -- OBS_GetBoundariesByPointAndRadius -- -- Given a point and radius, and it's geometry level (see -- OBS_ListGeomColumns() for all available boundary ids), give back the -- boundaries that are contained within the point buffered by radius meters and -- the associated geometry ids -- Inputs: -- geom geometry: point geometry centered on area of interest -- radius numeric: radius (in meters) of a circle centered on geom for -- selecting polygons -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- table with the following columns -- boundary geometry: geometry boundary that is contained within the input -- bounding box at the requested geometry level -- with boundary_id, and time_span -- geom_refs text: geometry identifiers (e.g., geoid for the US Census) -- -- TODO: move to ST_DWithin instead of buffer + intersects? CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundariesByPointAndRadius( geom geometry(Point, 4326), -- point radius numeric, -- radius in meters boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ DECLARE circle_boundary geometry(Geometry, 4326); BEGIN IF ST_GeometryType(geom) != 'ST_Point' THEN RAISE EXCEPTION 'Input geometry ''%'' is not a point', ST_AsText(geom); ELSE circle_boundary := ST_Buffer(geom::geography, radius)::geometry; END IF; RETURN QUERY SELECT * FROM cdb_observatory._OBS_GetBoundariesByGeometry( circle_boundary, boundary_id, time_span); RETURN; END; $$ LANGUAGE plpgsql; -- _OBS_GetPointsByGeometry CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPointsByGeometry( geom geometry(Geometry, 4326), boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ DECLARE boundary geometry(Geometry, 4326); geom_colname text; geoid_colname text; target_table text; BEGIN IF lower(overlap_type) NOT IN ('contains', 'within', 'intersects') THEN RAISE EXCEPTION 'Overlap type ''%'' is not an accepted type (choose intersects, within, or contains)', overlap_type; ELSIF ST_GeometryType(geom) NOT IN ('ST_Polygon', 'ST_MultiPolygon') THEN RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_MultiPolygon'' or ''ST_Polygon''', ST_GeometryType(geom); END IF; SELECT * INTO geoid_colname, target_table, geom_colname FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); -- if no tables are found, raise notice and return null IF target_table IS NULL THEN --RAISE NOTICE 'No boundaries found for bounding box ''%'' in ''%''', ST_AsText(geom), boundary_id; RETURN QUERY SELECT NULL::geometry, NULL::text; RETURN; END IF; --RAISE NOTICE 'target_table: %', target_table; -- return first boundary in intersections RETURN QUERY EXECUTE format( 'SELECT ST_PointOnSurface(%I) As %s, %I::text FROM observatory.%I WHERE ST_%s($1, the_geom) ', geom_colname, geom_colname, geoid_colname, target_table, overlap_type) USING geom; RETURN; END; $$ LANGUAGE plpgsql; -- OBS_GetPointsByGeometry -- -- Given a polygon, and it's geometry level (see -- OBS_ListGeomColumns() for all available boundary ids), give back a point -- which lies in a boundary from the requested geometry level that is contained -- within the bounding box polygon and the associated geometry ids -- -- Inputs: -- geom geometry: bounding box (or polygon) of the region of interest -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- table with the following columns -- boundary geometry: point that lies on a boundary that is contained within -- the input bounding box at the requested geometry -- level with boundary_id, and time_span -- geom_refs text: geometry identifiers (e.g., geoid for the US Census) -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPointsByGeometry( geom geometry(Geometry, 4326), boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ BEGIN RETURN QUERY SELECT * FROM cdb_observatory._OBS_GetPointsByGeometry( geom, boundary_id, time_span, overlap_type); RETURN; END; $$ LANGUAGE plpgsql; -- OBS_GetBoundariesByPointAndRadius -- -- Given a point and radius, and it's geometry level (see -- OBS_ListGeomColumns() for all available boundary ids), give back the -- boundaries that are contained within the point buffered by radius meters and -- the associated geometry ids -- Inputs: -- geom geometry: point geometry centered on area of interest -- radius numeric: radius (in meters) of a circle centered on geom for -- selecting polygons -- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) -- see function OBS_ListGeomColumns for all avaiable -- boundary ids -- time_span text: time span that the geometries were collected (optional) -- -- Output: -- table with the following columns -- boundary geometry: geometry boundary that is contained within the input -- bounding box at the requested geometry level -- with boundary_id, and time_span -- geom_refs text: geometry identifiers (e.g., geoid for the US Census) -- CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPointsByPointAndRadius( geom geometry(Point, 4326), -- point radius numeric, -- radius in meters boundary_id text, time_span text DEFAULT NULL, overlap_type text DEFAULT 'intersects') RETURNS TABLE(the_geom geometry, geom_refs text) AS $$ DECLARE circle_boundary geometry(Geometry, 4326); BEGIN IF ST_GeometryType(geom) != 'ST_Point' THEN RAISE EXCEPTION 'Input geometry ''%'' is not a point', ST_AsText(geom); ELSE circle_boundary := ST_Buffer(geom::geography, radius)::geometry; END IF; RETURN QUERY SELECT * FROM cdb_observatory._OBS_GetPointsByGeometry( ST_Buffer(geom::geography, radius)::geometry, boundary_id, time_span, overlap_type); RETURN; END; $$ LANGUAGE plpgsql; -- _OBS_GetGeometryMetadata() -- TODO: add timespan in search -- TODO: add choice of clipped versus not clipped CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryMetadata(boundary_id text) RETURNS table(geoid_colname text, target_table text, geom_colname text) AS $$ BEGIN RETURN QUERY EXECUTE format($string$ SELECT geoid_ct.colname::text As geoid_colname, tablename::text, geom_ct.colname::text As geom_colname FROM observatory.obs_column_table As geoid_ct, observatory.obs_table As geom_t, observatory.obs_column_table As geom_ct, observatory.obs_column As geom_c WHERE geoid_ct.column_id IN ( SELECT source_id FROM observatory.obs_column_to_column WHERE reltype = 'geom_ref' AND target_id = '%s' ) AND geoid_ct.table_id = geom_t.id AND geom_t.id = geom_ct.table_id AND geom_ct.column_id = geom_c.id AND geom_c.type ILIKE 'geometry' AND geom_c.id = '%s' $string$, boundary_id, boundary_id); RETURN; -- AND geom_t.timespan = '%s' <-- put in requested year -- TODO: filter by clipped vs. not so appropriate tablename are unique -- so the limit 1 can be removed RETURN; END; $$ LANGUAGE plpgsql; CREATE TYPE cdb_observatory.ds_fdw_metadata as (schemaname text, tabname text, servername text); CREATE TYPE cdb_observatory.ds_return_metadata as (colnames text[], coltypes text[]); CREATE OR REPLACE FUNCTION cdb_observatory._OBS_ConnectUserTable(username text, orgname text, user_db_role text, input_schema text, dbname text, host_addr text, table_name text) RETURNS cdb_observatory.ds_fdw_metadata AS $$ DECLARE fdw_server text; fdw_import_schema text; connection_str json; import_foreign_schema_q text; epoch_timestamp text; BEGIN SELECT extract(epoch from now() at time zone 'utc')::int INTO epoch_timestamp; fdw_server := 'fdw_server_' || username || '_' || epoch_timestamp; fdw_import_schema:= fdw_server; -- Import foreign table EXECUTE FORMAT ('SELECT cdb_observatory._OBS_ConnectRemoteTable(%L, %L, %L, %L, %L, %L, %L)', fdw_server, fdw_import_schema, dbname, host_addr, user_db_role, table_name, input_schema); RETURN (fdw_import_schema::text, table_name::text, fdw_server::text); EXCEPTION WHEN others THEN -- Disconnect user imported table. Delete schema and FDW server. EXECUTE 'DROP FOREIGN TABLE IF EXISTS ' || fdw_import_schema || '.' || table_name; EXECUTE 'DROP SCHEMA IF EXISTS ' || fdw_import_schema || ' CASCADE'; EXECUTE 'DROP SERVER IF EXISTS ' || fdw_server || ' CASCADE;'; RETURN (null, null, null); END; $$ LANGUAGE plpgsql SECURITY DEFINER; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetReturnMetadata(username text, orgname text, function_name text, params json) RETURNS cdb_observatory.ds_return_metadata AS $$ DECLARE colnames text[]; coltypes text[]; requested_measures text[]; measure text; BEGIN -- Simple mock, there should be real logic in here. IF $3 NOT ILIKE 'GetMeasure' OR $3 IS NULL THEN RAISE 'This function is not supported yet: %', $3; END IF; SELECT translate($4::json->>'tag_name','[]', '{}')::text[] INTO requested_measures; FOREACH measure IN ARRAY requested_measures LOOP IF NOT measure ILIKE ANY (Array['total_pop', 'pop_16_over']::text[]) THEN RAISE 'This measure is not supported yet: %', measure; END IF; SELECT array_append(colnames, measure) INTO colnames; SELECT array_append(coltypes, 'double precision'::text) INTO coltypes; END LOOP; RETURN (colnames::text[], coltypes::text[]); END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_FetchJoinFdwTableData(username text, orgname text, table_schema text, table_name text, function_name text, params json) RETURNS SETOF record AS $$ DECLARE data_query text; tag_name text[]; tag text; tags_list text; tags_query text; rec RECORD; BEGIN SELECT translate($6::json->>'tag_name','[]', '{}')::text[] INTO tag_name; SELECT array_to_string(tag_name, ',') INTO tags_list; tags_query := ''; FOREACH tag IN ARRAY tag_name LOOP SELECT tags_query || ' sum(' || tag || '/fraction)::double precision as ' || tag || ', ' INTO tags_query; END LOOP; -- Simple mock, there should be real logic in here. data_query := '(WITH _areas AS(SELECT ST_Area(a.the_geom::geography)' || '/ (1000 * 1000) as fraction, a.geoid, b.cartodb_id FROM ' || 'observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308 as a, ' || table_schema || '.' || table_name || ' AS b ' || 'WHERE b.the_geom && a.the_geom ), values AS (SELECT geoid, ' || tags_list || ' FROM observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd ) ' || 'SELECT ' || tags_query || ' cartodb_id::int FROM _areas, values ' || 'WHERE values.geoid = _areas.geoid GROUP BY cartodb_id);'; FOR rec IN EXECUTE data_query LOOP RETURN NEXT rec; END LOOP; RETURN; END; $$ LANGUAGE plpgsql SECURITY DEFINER; CREATE OR REPLACE FUNCTION cdb_observatory._OBS_DisconnectUserTable(username text, orgname text, table_schema text, table_name text, servername text) RETURNS boolean AS $$ BEGIN EXECUTE 'DROP FOREIGN TABLE IF EXISTS "' || table_schema || '".' || table_name; EXECUTE 'DROP SCHEMA IF EXISTS ' || table_schema || ' CASCADE'; EXECUTE 'DROP SERVER IF EXISTS ' || servername || ' CASCADE;'; RETURN true; END; $$ LANGUAGE plpgsql SECURITY DEFINER; -- Placeholder for permission tweaks at creation time. -- Make sure by default there are no permissions for publicuser -- NOTE: this happens at extension creation time, as part of an implicit transaction. -- REVOKE ALL PRIVILEGES ON SCHEMA cdb_observatory FROM PUBLIC, publicuser CASCADE; -- Grant permissions on the schema to publicuser (but just the schema) -- GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; -- Revoke execute permissions on all functions in the schema by default -- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_observatory FROM PUBLIC, publicuser;