diff --git a/release/observatory--1.0.0.sql b/release/observatory--1.0.0.sql new file mode 100644 index 0000000..9011620 --- /dev/null +++ b/release/observatory--1.0.0.sql @@ -0,0 +1,1816 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION observatory" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_observatory_version() +RETURNS text AS $$ + SELECT '1.0.0'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_observatory_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='observatory' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; + +-- Returns the table name with geoms for the given geometry_id +-- TODO probably needs to take in the column_id array to get the relevant +-- table where there is multiple sources for a column from multiple +-- geometries. +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GeomTable( + geom geometry(Geometry, 4326), + geometry_id text, + time_span text DEFAULT NULL +) + RETURNS TEXT +AS $$ +DECLARE + result text; +BEGIN + EXECUTE ' + SELECT tablename FROM observatory.OBS_table + WHERE id IN ( + SELECT table_id + FROM observatory.OBS_table tab, + observatory.OBS_column_table coltable, + observatory.OBS_column col + WHERE type ILIKE ''geometry'' + AND coltable.column_id = col.id + AND coltable.table_id = tab.id + AND col.id = $1 + AND CASE WHEN $3::TEXT IS NOT NULL THEN timespan ILIKE $3::TEXT ELSE TRUE END + ORDER BY timespan DESC LIMIT 1 + ) + ' + USING geometry_id, geom, time_span + INTO result; + + return result; + +END; +$$ LANGUAGE plpgsql; + + + +-- A function that gets the column data for multiple columns +-- Old: OBS_GetColumnData +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetColumnData( + geometry_id text, + column_ids text[], + timespan text +) +RETURNS SETOF JSON +AS $$ +BEGIN + + -- figure out highest-weight geometry_id/timespan pair for the first data column + -- TODO this should be done for each data column separately + IF geometry_id IS NULL OR timespan IS NULL THEN + EXECUTE ' + SELECT data_t.timespan timespan, geom_c.id boundary_id + FROM observatory.obs_table data_t, + observatory.obs_column_table data_ct, + observatory.obs_column data_c, + observatory.obs_column_table geoid_ct, + observatory.obs_column_to_column c2c, + observatory.obs_column geom_c + WHERE data_c.id = $2 + AND data_ct.column_id = data_c.id + AND data_ct.table_id = data_t.id + AND geoid_ct.table_id = data_t.id + AND geoid_ct.column_id = c2c.source_id + AND c2c.reltype = ''geom_ref'' + AND geom_c.id = c2c.target_id + AND CASE WHEN $3 IS NULL THEN True ELSE $3 = timespan END + AND CASE WHEN $1 IS NULL THEN True ELSE $1 = geom_c.id END + ORDER BY geom_c.weight DESC, + data_t.timespan DESC + LIMIT 1 + ' INTO timespan, geometry_id + USING geometry_id, (column_ids)[1], timespan; + END IF; + + RETURN QUERY + EXECUTE ' + WITH geomref AS ( + SELECT ct.table_id id + FROM observatory.OBS_column_to_column c2c, + observatory.OBS_column_table ct + WHERE c2c.reltype = ''geom_ref'' + AND c2c.target_id = $1 + AND c2c.source_id = ct.column_id + ), + column_ids as ( + select row_number() over () as no, a.column_id as column_id from (select unnest($2) as column_id) a + ) + SELECT row_to_json(a) from ( + select colname, + tablename, + aggregate, + name, + type, + c.description, + $1 AS boundary_id + FROM column_ids, observatory.OBS_column c, observatory.OBS_column_table ct, observatory.OBS_table t + WHERE column_ids.column_id = c.id + AND c.id = ct.column_id + AND t.id = ct.table_id + AND t.timespan = $3 + AND t.id in (SELECT id FROM geomref) + order by column_ids.no + ) a + ' + USING geometry_id, column_ids, timespan + RETURN; + +END; +$$ LANGUAGE plpgsql; + +--Test point cause Stuart always seems to make random points in the water +CREATE OR REPLACE FUNCTION cdb_observatory._TestPoint() + RETURNS geometry(Point, 4326) +AS $$ +BEGIN + -- new york city + RETURN ST_SetSRID(ST_Point( -73.936669, 40.704512), 4326); +END; +$$ LANGUAGE plpgsql; + +--Test polygon cause Stuart always seems to make random points in the water +-- TODO: remove as it's not used anywhere? +CREATE OR REPLACE FUNCTION cdb_observatory._TestArea() + RETURNS geometry(Geometry, 4326) +AS $$ +BEGIN + -- Buffer NYC point by 500 meters + RETURN ST_Buffer(cdb_observatory._TestPoint()::geography, 500)::geometry; + +END; +$$ LANGUAGE plpgsql; + +--Used to expand a column based response to a table based one. Give it the desired +--columns and it will return a partial query for rolling them out to a table. +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_BuildSnapshotQuery(names text[]) +RETURNS TEXT +AS $$ +DECLARE + q text; + i numeric; +BEGIN + + q := 'SELECT '; + + FOR i IN 1..array_upper(names,1) + LOOP + q = q || format(' vals[%s] As %I', i, names[i]); + IF i < array_upper(names, 1) THEN + q= q || ','; + END IF; + END LOOP; + RETURN q; + +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetRelatedColumn(columns_ids text[], reltype text ) +RETURNS TEXT[] +AS $$ +DECLARE + result TEXT[]; +BEGIN + EXECUTE ' + With ids as ( + select row_number() over() as no, id from (select unnest($1) as id) t + ) + select array_agg(target_id order by no) + FROM ids + LEFT JOIN observatory.obs_column_to_column + on source_id = id + where reltype = $2 or reltype is null + ' + INTO result + using columns_ids, reltype; + return result; +END; +$$ LANGUAGE plpgsql; + +-- Function that replaces all non digits or letters with _ trims and lowercases the +-- passed measure name + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_StandardizeMeasureName(measure_name text) +RETURNS text +AS $$ +DECLARE + result text; +BEGIN + -- Turn non letter or digits to _ + result = regexp_replace(measure_name, '[^\dA-Za-z]+','_', 'g'); + -- Remove duplicate _'s + result = regexp_replace(result,'_{2,}','_', 'g'); + -- Trim _'s from beginning and end + result = trim(both '_' from result); + result = lower(result); + RETURN result; +END; +$$ LANGUAGE plpgsql; + +-- Function that returns the currently deployed obs_dump_version from the +-- remote table of the same name. + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_DumpVersion( +) + RETURNS TEXT +AS $$ +DECLARE + result text; +BEGIN + EXECUTE ' + SELECT MAX(dump_id) FROM observatory.obs_dump_version + ' INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; + +--For Longer term Dev + + +--Break out table definitions to types +--Automate type creation from a script, something like +----CREATE OR REPLACE FUNCTION OBS_Get<%=tag_name%>(geom GEOMETRY) +----RETURNS TABLE( +----<%=get_dimensions_for_tag(tag_name)%> +----AS $$ +----DECLARE +----target_cols text[]; +----names text[]; +----vals NUMERIC[];- +----q text; +----BEGIN +----target_cols := Array[<%=get_dimensions_for_tag(tag_name)%>], + + +--Functions for augmenting specific tables +-------------------------------------------------------------------------------- + +-- Creates a table of demographic snapshot + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetDemographicSnapshot(geom geometry(Geometry, 4326), + time_span text DEFAULT NULL, + boundary_id text DEFAULT NULL) +RETURNS SETOF JSON +AS $$ + DECLARE + target_cols text[]; + BEGIN + + IF time_span IS NULL THEN + time_span = '2010 - 2014'; + END IF; + + IF boundary_id IS NULL THEN + boundary_id = 'us.census.tiger.block_group'; + END IF; + + target_cols := Array['us.census.acs.B01003001', + 'us.census.acs.B01001002', + 'us.census.acs.B01001026', + 'us.census.acs.B01002001', + 'us.census.acs.B03002003', + 'us.census.acs.B03002004', + 'us.census.acs.B03002006', + 'us.census.acs.B03002012', + 'us.census.acs.B03002005', + 'us.census.acs.B03002008', + 'us.census.acs.B03002009', + 'us.census.acs.B03002002', + --'not_us_citizen_pop', + --'workers_16_and_over', + --'commuters_by_car_truck_van', + --'commuters_drove_alone', + --'commuters_by_carpool', + --'commuters_by_public_transportation', + --'commuters_by_bus', + --'commuters_by_subway_or_elevated', + --'walked_to_work', + --'worked_at_home', + --'children', + 'us.census.acs.B11001001', + --'population_3_years_over', + --'in_school', + --'in_grades_1_to_4', + --'in_grades_5_to_8', + --'in_grades_9_to_12', + --'in_undergrad_college', + 'us.census.acs.B15003001', + 'us.census.acs.B15003017', + 'us.census.acs.B15003019', + 'us.census.acs.B15003020', + 'us.census.acs.B15003021', + 'us.census.acs.B15003022', + 'us.census.acs.B15003023', + --'pop_5_years_over', + --'speak_only_english_at_home', + --'speak_spanish_at_home', + --'pop_determined_poverty_status', + --'poverty', + 'us.census.acs.B19013001', + 'us.census.acs.B19083001', + 'us.census.acs.B19301001', + 'us.census.acs.B25001001', + 'us.census.acs.B25002003', + 'us.census.acs.B25004002', + 'us.census.acs.B25004004', + 'us.census.acs.B25058001', + 'us.census.acs.B25071001', + 'us.census.acs.B25075001', + 'us.census.acs.B25075025', + 'us.census.acs.B25081002', + --'pop_15_and_over', + --'pop_never_married', + --'pop_now_married', + --'pop_separated', + --'pop_widowed', + --'pop_divorced', + 'us.census.acs.B08134001', + 'us.census.acs.B08134002', + 'us.census.acs.B08134003', + 'us.census.acs.B08134004', + 'us.census.acs.B08134005', + 'us.census.acs.B08134006', + 'us.census.acs.B08134007', + 'us.census.acs.B08134008', + 'us.census.acs.B08134009', + 'us.census.acs.B08134010', + 'us.census.acs.B08135001', + 'us.census.acs.B19001002', + 'us.census.acs.B19001003', + 'us.census.acs.B19001004', + 'us.census.acs.B19001005', + 'us.census.acs.B19001006', + 'us.census.acs.B19001007', + 'us.census.acs.B19001008', + 'us.census.acs.B19001009', + 'us.census.acs.B19001010', + 'us.census.acs.B19001011', + 'us.census.acs.B19001012', + 'us.census.acs.B19001013', + 'us.census.acs.B19001014', + 'us.census.acs.B19001015', + 'us.census.acs.B19001016', + 'us.census.acs.B19001017']; + RETURN QUERY + EXECUTE + 'select * from cdb_observatory._OBS_Get($1, $2, $3, $4 )' + USING geom, target_cols, time_span, boundary_id + RETURN; +END; +$$ LANGUAGE plpgsql; + + +--Base functions for performing augmentation +---------------------------------------------------------------------------------------- + +-- Base augmentation fucntion. +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_Get( + geom geometry(Geometry, 4326), + column_ids text[], + time_span text, + geometry_level text +) +RETURNS SETOF JSON +AS $$ +DECLARE + results json[]; + geom_table_name text; + names text[]; + query text; + data_table_info json[]; +BEGIN + + EXECUTE + 'SELECT array_agg(_obs_getcolumndata) + FROM cdb_observatory._OBS_GetColumnData($1, $2, $3);' + INTO data_table_info + USING geometry_level, column_ids, time_span; + + IF geometry_level IS NULL THEN + geometry_level = data_table_info[1]->>'boundary_id'; + END IF; + + geom_table_name := cdb_observatory._OBS_GeomTable(geom, geometry_level); + + IF geom_table_name IS NULL + THEN + RAISE NOTICE 'Point % is outside of the data region', ST_AsText(geom); + -- TODO this should return JSON + RETURN QUERY SELECT '{}'::json; + RETURN; + END IF; + + IF data_table_info IS NULL THEN + RAISE NOTICE 'Cannot find data table for boundary ID %, column_ids %, and time_span %', geometry_level, column_ids, time_span; + END IF; + + IF ST_GeometryType(geom) = 'ST_Point' + THEN + RAISE NOTICE 'geom_table_name %, data_table_info %', geom_table_name, data_table_info::json[]; + results := cdb_observatory._OBS_GetPoints(geom, + geom_table_name, + data_table_info); + + ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') + THEN + results := cdb_observatory._OBS_GetPolygons(geom, + geom_table_name, + data_table_info); + END IF; + + RETURN QUERY + EXECUTE + $query$ + SELECT unnest($1) + $query$ + USING results; + RETURN; + +END; +$$ LANGUAGE plpgsql; + + +-- If the variable of interest is just a rate return it as such, +-- otherwise normalize it to the census block area and return that +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPoints( + geom geometry(Geometry, 4326), + geom_table_name text, -- TODO: change to boundary_id + data_table_info json[] +) +RETURNS json[] +AS $$ +DECLARE + result NUMERIC[]; + json_result json[]; + query text; + i int; + geoid text; + data_geoid_colname text; + geom_geoid_colname text; + area NUMERIC; +BEGIN + + -- TODO we're assuming our geom_table has only one geom_ref column + -- we *really* should pass in both geom_table_name and boundary_id + -- TODO tablename should not be passed here (use boundary_id) + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , (data_table_info)[1]->>'tablename') + INTO data_geoid_colname; + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , geom_table_name) + INTO geom_geoid_colname; + + EXECUTE + format('SELECT %I + FROM observatory.%I + WHERE ST_Within($1, the_geom)', + geom_geoid_colname, + geom_table_name) + USING geom + INTO geoid; + + RAISE NOTICE 'geoid is %, geometry table is % ', geoid, geom_table_name; + + EXECUTE + format('SELECT ST_Area(the_geom::geography) / (1000 * 1000) + FROM observatory.%I + WHERE %I = %L', + geom_table_name, + geom_geoid_colname, + geoid) + INTO area; + + IF area IS NULL + THEN + RAISE NOTICE 'No geometry at %', ST_AsText(geom); + END IF; + + query := 'SELECT Array['; + FOR i IN 1..array_upper(data_table_info, 1) + LOOP + IF area is NULL OR area = 0 + THEN + -- give back null values + query := query || format('NULL::numeric '); + ELSIF ((data_table_info)[i])->>'aggregate' != 'sum' + THEN + -- give back full variable + query := query || format('%I ', ((data_table_info)[i])->>'colname'); + ELSE + -- give back variable normalized by area of geography + query := query || format('%I/%s ', + ((data_table_info)[i])->>'colname', + area); + END IF; + + IF i < array_upper(data_table_info, 1) + THEN + query := query || ','; + END IF; + END LOOP; + + query := query || format(' ]::numeric[] + FROM observatory.%I + WHERE %I.%I = %L + ', + ((data_table_info)[1])->>'tablename', + ((data_table_info)[1])->>'tablename', + data_geoid_colname, + geoid + ); + + EXECUTE + query + INTO result + USING geom; + + EXECUTE + $query$ + SELECT array_agg(row_to_json(t)) FROM ( + SELECT values As value, + meta->>'name' As name, + meta->>'tablename' As tablename, + meta->>'aggregate' As aggregate, + meta->>'type' As type, + meta->>'description' As description + FROM (SELECT unnest($1) As values, unnest($2) As meta) b + ) t + $query$ + INTO json_result + USING result, data_table_info; + + RETURN json_result; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure( + geom geometry(Geometry, 4326), + measure_id TEXT, + normalize TEXT DEFAULT 'area', -- TODO none/null + boundary_id TEXT DEFAULT NULL, + time_span TEXT DEFAULT NULL +) +RETURNS NUMERIC +AS $$ +DECLARE + result NUMERIC; + measure_ids TEXT[]; + denominator_id TEXT; + vals NUMERIC[]; +BEGIN + + IF normalize ILIKE 'area' THEN + measure_ids := ARRAY[measure_id]; + ELSIF normalize ILIKE 'denominator' THEN + EXECUTE 'SELECT (cdb_observatory._OBS_GetRelatedColumn(ARRAY[$1], ''denominator''))[1] + ' INTO denominator_id + USING measure_id; + measure_ids := ARRAY[measure_id, denominator_id]; + ELSIF normalize ILIKE 'none' THEN + -- TODO we need a switch on obs_get to disable area normalization + RAISE EXCEPTION 'No normalization not yet supported.'; + ELSE + RAISE EXCEPTION 'Only valid inputs for "normalize" are "area" (default) and "denominator".'; + END IF; + + EXECUTE ' + SELECT ARRAY_AGG(val) FROM (SELECT (cdb_observatory._OBS_Get($1, $2, $3, $4)->>''value'')::NUMERIC val) b + ' + INTO vals + USING geom, measure_ids, time_span, boundary_id; + + IF normalize ILIKE 'denominator' THEN + RETURN (vals)[1]/(vals)[2]; + ELSE + RETURN (vals)[1]; + END IF; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureById( + geom_ref TEXT, + measure_id TEXT, + boundary_id TEXT, + time_span TEXT DEFAULT NULL +) +RETURNS NUMERIC +AS $$ +DECLARE + target_table TEXT; + colname TEXT; + measure_val NUMERIC; + data_geoid_colname TEXT; + test_query TEXT; +BEGIN + + SELECT x ->> 'colname', x ->> 'tablename' INTO colname, target_table + FROM cdb_observatory._OBS_GetColumnData(boundary_id, Array[measure_id], time_span) As x; + + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , target_table) + INTO data_geoid_colname; + + RAISE DEBUG 'target_table %, colname %', target_table, colname; + + EXECUTE format( + 'SELECT %I + FROM observatory.%I + WHERE %I.%I = %L', + colname, + target_table, + target_table, data_geoid_colname, geom_ref) + INTO measure_val; + + RETURN measure_val; + +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetCategory( + geom geometry(Geometry, 4326), + category_id TEXT, + boundary_id TEXT DEFAULT NULL, + time_span TEXT DEFAULT NULL +) +RETURNS TEXT +AS $$ +DECLARE + denominator_id TEXT; + categories TEXT[]; +BEGIN + + IF boundary_id IS NULL THEN + -- TODO we should determine best boundary for this geom + boundary_id := 'us.census.tiger.census_tract'; + END IF; + + IF time_span IS NULL THEN + -- TODO we should determine latest timespan for this measure + time_span := '2010 - 2014'; + END IF; + + EXECUTE ' + SELECT ARRAY_AGG(val) FROM (SELECT (cdb_observatory._OBS_GetCategories($1, $2, $3, $4))->>''category'' val LIMIT 1) b + ' + INTO categories + USING geom, ARRAY[category_id], boundary_id, time_span; + + RETURN (categories)[1]; + +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetUSCensusMeasure( + geom geometry(Geometry, 4326), + name TEXT, + normalize TEXT DEFAULT 'area', + boundary_id TEXT DEFAULT NULL, + time_span TEXT DEFAULT NULL + ) +RETURNS NUMERIC AS $$ +DECLARE + standardized_name text; + measure_id text; + result NUMERIC; +BEGIN + standardized_name = cdb_observatory._OBS_StandardizeMeasureName(name); + + EXECUTE $string$ + SELECT c.id + FROM observatory.obs_column c + JOIN observatory.obs_column_tag ct + ON c.id = ct.column_id + WHERE cdb_observatory._OBS_StandardizeMeasureName(c.name) = $1 + AND ct.tag_id ILIKE 'us.census%' + $string$ + INTO measure_id + USING standardized_name; + + EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure($1, $2, $3, $4, $5)' + INTO result + USING geom, measure_id, normalize, boundary_id, time_span; + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetUSCensusCategory( + geom geometry(Geometry, 4326), + name TEXT, + boundary_id TEXT DEFAULT NULL, + time_span TEXT DEFAULT NULL + ) +RETURNS TEXT AS $$ +DECLARE + standardized_name text; + category_id text; + result TEXT; +BEGIN + standardized_name = cdb_observatory._OBS_StandardizeMeasureName(name); + + EXECUTE $string$ + SELECT c.id + FROM observatory.obs_column c + --JOIN observatory.obs_column_tag ct + -- ON c.id = ct.column_id + WHERE cdb_observatory._OBS_StandardizeMeasureName(c.name) = $1 + AND c.type ILIKE 'TEXT' + AND c.id ILIKE 'us.census%' -- TODO this should be done by tag + --AND ct.tag_id = 'us.census.acs.demographics' + $string$ + INTO category_id + USING standardized_name; + + EXECUTE 'SELECT cdb_observatory.OBS_GetCategory($1, $2, $3, $4)' + INTO result + USING geom, category_id, boundary_id, time_span; + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPopulation( + geom geometry(Geometry, 4326), + normalize TEXT DEFAULT 'area', + boundary_id TEXT DEFAULT NULL, + time_span TEXT DEFAULT NULL +) +RETURNS NUMERIC +AS $$ +DECLARE + population_measure_id TEXT; + result NUMERIC; +BEGIN + -- TODO use a super-column for global pop + population_measure_id := 'us.census.acs.B01003001'; + + EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure( + %L, %L, %L, %L, %L + ) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span) + INTO result; + + return result; +END; +$$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPolygons( + geom geometry(Geometry, 4326), + geom_table_name text, + data_table_info json[] +) +RETURNS json[] +AS $$ +DECLARE + result numeric[]; + json_result json[]; + q_select text; + q_sum text; + q text; + i NUMERIC; + data_geoid_colname text; + geom_geoid_colname text; +BEGIN + + -- TODO we're assuming our geom_table has only one geom_ref column + -- we *really* should pass in both geom_table_name and boundary_id + -- TODO tablename should not be passed here (use boundary_id) + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , (data_table_info)[1]->>'tablename') + INTO data_geoid_colname; + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , geom_table_name) + INTO geom_geoid_colname; + + q_select := format('SELECT %I, ', data_geoid_colname); + q_sum := 'SELECT Array['; + + FOR i IN 1..array_upper(data_table_info, 1) + LOOP + q_select := q_select || format( '%I ', ((data_table_info)[i])->>'colname'); + + IF ((data_table_info)[i])->>'aggregate' ='sum' + THEN + q_sum := q_sum || format('sum(overlap_fraction * COALESCE(%I, 0)) ',((data_table_info)[i])->>'colname',((data_table_info)[i])->>'colname'); + ELSE + q_sum := q_sum || ' NULL::numeric '; + END IF; + + IF i < array_upper(data_table_info,1) + THEN + q_select := q_select || format(','); + q_sum := q_sum || format(','); + END IF; + END LOOP; + + q := format(' + WITH _overlaps As ( + SELECT ST_Area( + ST_Intersection($1, a.the_geom) + ) / ST_Area(a.the_geom) As overlap_fraction, + %I + FROM observatory.%I As a + WHERE $1 && a.the_geom + ), + values As ( + ', geom_geoid_colname, geom_table_name); + + q := q || q_select || format('FROM observatory.%I ', ((data_table_info)[1]->>'tablename')); + + q := format(q || ' ) ' || q_sum || ' ]::numeric[] FROM _overlaps, values + WHERE values.%I = _overlaps.%I', data_geoid_colname, geom_geoid_colname); + + EXECUTE + q + INTO result + USING geom; + + EXECUTE + $query$ + SELECT array_agg(row_to_json(t)) FROM ( + SELECT values As value, + meta->>'name' As name, + meta->>'tablename' As tablename, + meta->>'aggregate' As aggregate, + meta->>'type' As type, + meta->>'description' As description + FROM (SELECT unnest($1) As values, unnest($2) As meta) b + ) t + $query$ + INTO json_result + USING result, data_table_info; + + RETURN json_result; +END; +$$ LANGUAGE plpgsql; + + + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetSegmentSnapshot( + geom geometry(Geometry, 4326), + boundary_id text DEFAULT NULL +) +RETURNS JSON +AS $$ +DECLARE + target_cols text[]; + result json; + seg_name Text; + geom_id Text; + q Text; + segment_names Text[]; +BEGIN +IF boundary_id IS NULL THEN + boundary_id = 'us.census.tiger.census_tract'; +END IF; +target_cols := Array[ + 'us.census.acs.B01003001_quantile', + 'us.census.acs.B01001002_quantile', + 'us.census.acs.B01001026_quantile', + 'us.census.acs.B01002001_quantile', + 'us.census.acs.B03002003_quantile', + 'us.census.acs.B03002004_quantile', + 'us.census.acs.B03002006_quantile', + 'us.census.acs.B03002012_quantile', + 'us.census.acs.B05001006_quantile',-- + 'us.census.acs.B08006001_quantile',-- + 'us.census.acs.B08006002_quantile',-- + 'us.census.acs.B08006008_quantile',-- + 'us.census.acs.B08006009_quantile',-- + 'us.census.acs.B08006011_quantile',-- + 'us.census.acs.B08006015_quantile',-- + 'us.census.acs.B08006017_quantile',-- + 'us.census.acs.B09001001_quantile',-- + 'us.census.acs.B11001001_quantile', + 'us.census.acs.B14001001_quantile',-- + 'us.census.acs.B14001002_quantile',-- + 'us.census.acs.B14001005_quantile',-- + 'us.census.acs.B14001006_quantile',-- + 'us.census.acs.B14001007_quantile',-- + 'us.census.acs.B14001008_quantile',-- + 'us.census.acs.B15003001_quantile', + 'us.census.acs.B15003017_quantile', + 'us.census.acs.B15003022_quantile', + 'us.census.acs.B15003023_quantile', + 'us.census.acs.B16001001_quantile',-- + 'us.census.acs.B16001002_quantile',-- + 'us.census.acs.B16001003_quantile',-- + 'us.census.acs.B17001001_quantile',-- + 'us.census.acs.B17001002_quantile',-- + 'us.census.acs.B19013001_quantile', + 'us.census.acs.B19083001_quantile', + 'us.census.acs.B19301001_quantile', + 'us.census.acs.B25001001_quantile', + 'us.census.acs.B25002003_quantile', + 'us.census.acs.B25004002_quantile', + 'us.census.acs.B25004004_quantile', + 'us.census.acs.B25058001_quantile', + 'us.census.acs.B25071001_quantile', + 'us.census.acs.B25075001_quantile', + 'us.census.acs.B25075025_quantile' + ]; + + EXECUTE + $query$ + SELECT array_agg(_OBS_GetCategories->>'category') + FROM cdb_observatory._OBS_GetCategories( + $1, + Array['us.census.spielman_singleton_segments.X10', 'us.census.spielman_singleton_segments.X55'], + $2) + $query$ + INTO segment_names + USING geom, boundary_id; + + q := + format($query$ + WITH a As ( + SELECT + array_agg(_OBS_GET->>'name') As names, + array_agg(_OBS_GET->>'value') As vals + FROM cdb_observatory._OBS_Get($1, + $2, + '2010 - 2014', + $3) + + ), percentiles As ( + %s + FROM a) + SELECT row_to_json(r) FROM + ( SELECT $4 as x10_segment, $5 as x55_segment, percentiles.* + FROM percentiles) r + $query$, cdb_observatory._OBS_BuildSnapshotQuery(target_cols)) results; + + + EXECUTE + q + into result + USING geom, target_cols, boundary_id, segment_names[1], segment_names[2]; + + return result; + +END; +$$ LANGUAGE plpgsql; + +--Get categorical variables from point + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetCategories( + geom geometry(Geometry, 4326), + dimension_names text[], + boundary_id text DEFAULT NULL, + time_span text DEFAULT NULL +) +RETURNS SETOF JSON as $$ +DECLARE + geom_table_name text; + geoid text; + names text[]; + results text[]; + query text; + data_table_info json[]; +BEGIN + + IF time_span IS NULL THEN + time_span = '2010 - 2014'; + END IF; + + IF boundary_id IS NULL THEN + boundary_id = 'us.census.tiger.block_group'; + END IF; + + geom_table_name := cdb_observatory._OBS_GeomTable(geom, boundary_id); + + IF geom_table_name IS NULL + THEN + RAISE NOTICE 'Point % is outside of the data region', ST_AsText(geom); + RETURN QUERY SELECT '{}'::text[], '{}'::text[]; + RETURN; + END IF; + + EXECUTE ' + SELECT array_agg(_obs_getcolumndata) + FROM cdb_observatory._OBS_GetColumnData($1, $2, $3); + ' + INTO data_table_info + USING boundary_id, dimension_names, time_span; + + IF data_table_info IS NULL + THEN + RAISE NOTICE 'No data table found for this location'; + RETURN QUERY SELECT NULL::json; + RETURN; + END IF; + + EXECUTE + format('SELECT geoid + FROM observatory.%I + WHERE the_geom && $1', + geom_table_name) + USING geom + INTO geoid; + + IF geoid IS NULL + THEN + RAISE NOTICE 'No geometry id for this location'; + RETURN QUERY SELECT NULL::json; + RETURN; + END IF; + + query := 'SELECT ARRAY['; + + FOR i IN 1..array_upper(data_table_info, 1) + LOOP + query = query || format('%I ', lower(((data_table_info)[i])->>'colname')); + IF i < array_upper(data_table_info, 1) + THEN + query := query || ','; + END IF; + END LOOP; + + query := query || format(' ]::text[] + FROM observatory.%I + WHERE %I.geoid = %L + ', + ((data_table_info)[1])->>'tablename', + ((data_table_info)[1])->>'tablename', + geoid + ); + + EXECUTE + query + INTO results + USING geom; + + RETURN QUERY + EXECUTE + $query$ + SELECT row_to_json(t) FROM ( + SELECT categories As category, + meta->>'name' As name, + meta->>'tablename' As tablename, + meta->>'aggregate' As aggregate, + meta->>'type' As type, + meta->>'description' As description + FROM (SELECT unnest($1) As categories, + unnest($2) As meta) As b + ) t + $query$ + USING results, data_table_info; + RETURN; + +END; +$$ LANGUAGE plpgsql; +-- return a table that contains a string match based on input +-- TODO: implement search for timespan + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_SearchTables( + search_term text, + time_span text DEFAULT NULL +) +RETURNS table(tablename text, timespan text) +As $$ +DECLARE + out_var text[]; +BEGIN + + IF time_span IS NULL + THEN + RETURN QUERY + EXECUTE + 'SELECT tablename::text, timespan::text + FROM observatory.obs_table t + JOIN observatory.obs_column_table ct + ON ct.table_id = t.id + JOIN observatory.obs_column c + ON ct.column_id = c.id + WHERE c.type ILIKE ''geometry'' + AND c.id = $1' + USING search_term; + RETURN; + ELSE + RETURN QUERY + EXECUTE + 'SELECT tablename::text, timespan::text + FROM observatory.obs_table t + JOIN observatory.obs_column_table ct + ON ct.table_id = t.id + JOIN observatory.obs_column c + ON ct.column_id = c.id + WHERE c.type ILIKE ''geometry'' + AND c.id = $1 + AND t.timespan = $2' + USING search_term, time_span; + RETURN; + END IF; + +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +-- Functions used to search the observatory for measures +-------------------------------------------------------------------------------- +-- TODO allow the user to specify the boundary to search for measures +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_Search( + search_term text, + relevant_boundary text DEFAULT null +) +RETURNS TABLE(id text, description text, name text, aggregate text, source text) as $$ +DECLARE + boundary_term text; +BEGIN + IF relevant_boundary then + boundary_term = ''; + else + boundary_term = ''; + END IF; + + RETURN QUERY + EXECUTE format($string$ + SELECT id::text, description::text, + name::text, + aggregate::text, + NULL::TEXT source -- TODO use tags + FROM observatory.OBS_column + where name ilike '%%' || %L || '%%' + or description ilike '%%' || %L || '%%' + %s + $string$, search_term, search_term,boundary_term); + RETURN; +END +$$ LANGUAGE plpgsql; + + +-- Functions to return the geometry levels that a point is part of +-------------------------------------------------------------------------------- +-- TODO add test response + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetAvailableBoundaries( + geom geometry(Geometry, 4326), + timespan text DEFAULT null) +RETURNS TABLE(boundary_id text, description text, time_span text, tablename text) as $$ +DECLARE + timespan_query TEXT DEFAULT ''; +BEGIN + + IF timespan != NULL + THEN + timespan_query = format('AND timespan = %L', timespan); + END IF; + + RETURN QUERY + EXECUTE + $string$ + SELECT + column_id::text As column_id, + obs_column.description::text As description, + timespan::text As timespan, + tablename::text As tablename + FROM + observatory.OBS_table, + observatory.OBS_column_table, + observatory.OBS_column + WHERE + observatory.OBS_column_table.column_id = observatory.obs_column.id AND + observatory.OBS_column_table.table_id = observatory.obs_table.id + AND + observatory.OBS_column.type = 'Geometry' + AND + ST_Intersects($1, observatory.obs_table.the_geom) + $string$ || timespan_query + USING geom; + RETURN; +END +$$ LANGUAGE plpgsql; +-- Data Observatory -- Welcome to the Future +-- These Data Observatory functions provide access to boundary polyons (and +-- their ids) such as those available through the US Census Tiger, Who's on +-- First, the Spanish Census, and so on + + +-- OBS_GetBoundary +-- +-- Returns the boundary polygon(s) that overlap with the input point geometry. +-- From an input point geometry, find the boundary which intersects with the +-- centroid of the input geometry +-- Inputs: +-- geom geometry: input point geometry +-- boundary_id text: source id of boundaries +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- boundary geometry: geometry boundary that intersects with geom, is at the +-- resolution requested with boundary_id, and time_span +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundary( + geom geometry(Point, 4326), + boundary_id text, + time_span text DEFAULT NULL) +RETURNS geometry(Geometry, 4326) +AS $$ +DECLARE + boundary geometry(Geometry, 4326); + target_table text; +BEGIN + + -- TODO: Check if SRID = 4326, if not transform? + + -- if not a point, raise error + IF ST_GeometryType(geom) != 'ST_Point' + THEN + RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_Point''', ST_GeometryType(geom); + END IF; + + -- choose appropriate table based on time_span + IF time_span IS NULL + THEN + SELECT x.target_tables INTO target_table + FROM cdb_observatory._OBS_SearchTables(boundary_id, + time_span) As x(target_tables, + timespans) + ORDER BY x.timespans DESC + LIMIT 1; + ELSE + -- TODO: modify for only one table returned instead of arbitrarily choosing + -- one with LIMIT 1 (could be conflict between clipped vs non-clipped + -- boundaries in the metadata tables) + SELECT x.target_tables INTO target_table + FROM cdb_observatory._OBS_SearchTables(boundary_id, + time_span) As x(target_tables, + timespans) + WHERE x.timespans = time_span + LIMIT 1; + END IF; + + -- if no tables are found, raise notice and return null + IF target_table IS NULL + THEN + RAISE NOTICE 'No boundaries found for ''%'' in ''%''', ST_AsText(geom), boundary_id; + RETURN NULL::geometry; + END IF; + + RAISE NOTICE 'target_table: %', target_table; + + -- return the first boundary in intersections + EXECUTE format( + 'SELECT the_geom + FROM observatory.%I + WHERE ST_Intersects($1, the_geom) + LIMIT 1', target_table) + INTO boundary + USING geom; + + RETURN boundary; + +END; +$$ LANGUAGE plpgsql; + +-- OBS_GetBoundaryId +-- +-- retrieves the boundary identifier (e.g., '36047' = Kings County/Brooklyn, NY) +-- corresponding to the location geom and boundary types (e.g., +-- us.census.tiger.county) + +-- Inputs: +-- geom geometry: location where the boundary is requested to overlap with +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- geometry_id text: identifier of the geometry which overlaps with the input +-- point geom in the table corresponding to boundary_id and +-- time_span +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundaryId( + geom geometry(Point, 4326), + boundary_id text, + time_span text DEFAULT NULL +) +RETURNS text +AS $$ +DECLARE + output_id text; + target_table text; + geoid_colname text; +BEGIN + + -- If not point, raise error + IF ST_GeometryType(geom) != 'ST_Point' + THEN + RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_Point''', ST_GeometryType(geom); + END IF; + + -- choose appropriate table based on time_span + IF time_span IS NULL + THEN + SELECT x.target_tables INTO target_table + FROM cdb_observatory._OBS_SearchTables(boundary_id, + time_span) As x(target_tables, + timespans) + ORDER BY x.timespans DESC + LIMIT 1; + ELSE + SELECT x.target_tables INTO target_table + FROM cdb_observatory._OBS_SearchTables(boundary_id, + time_span) As x(target_tables, + timespans) + WHERE x.timespans = time_span + LIMIT 1; + END IF; + + -- if no tables are found, raise notice and return null + IF target_table IS NULL + THEN + RAISE NOTICE 'Warning: No boundaries found for ''%''', boundary_id; + RETURN NULL::text; + END IF; + + EXECUTE + format('SELECT ct.colname + FROM observatory.obs_column_to_column c2c, + observatory.obs_column_table ct, + observatory.obs_table t + WHERE c2c.reltype = ''geom_ref'' + AND ct.column_id = c2c.source_id + AND ct.table_id = t.id + AND t.tablename = %L' + , target_table) + INTO geoid_colname; + + RAISE NOTICE 'target_table: %, geoid_colname: %', target_table, geoid_colname; + + -- return geometry id column value + EXECUTE format( + 'SELECT %I::text + FROM observatory.%I + WHERE ST_Intersects($1, the_geom) + LIMIT 1', geoid_colname, target_table) + INTO output_id + USING geom; + + RETURN output_id; + +END; +$$ LANGUAGE plpgsql; + + +-- OBS_GetBoundaryById +-- +-- Given a geometry reference (e.g., geoid for US Census), and it's geometry +-- level (see OBS_ListGeomColumns() for all available boundary ids), give back +-- the boundary that corresponds to that geometry_id, boundary_id, and +-- time_span + +-- Inputs: +-- geometry_id text: geometry id of the requested boundary +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- boundary geometry: geometry boundary that matches geometry_id, is at the +-- resolution requested with boundary_id, and time_span +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundaryById( + geometry_id text, -- ex: '36047' + boundary_id text, -- ex: 'us.census.tiger.county' + time_span text DEFAULT NULL -- ex: '2009' +) +RETURNS geometry(geometry, 4326) +AS $$ +DECLARE + boundary geometry(geometry, 4326); + target_table text; + geoid_colname text; + geom_colname text; +BEGIN + + SELECT * INTO geoid_colname, target_table, geom_colname + FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); + + RAISE NOTICE '%', target_table; + + IF target_table IS NULL + THEN + RAISE NOTICE 'No geometries found'; + RETURN NULL::geometry; + END IF; + + -- retrieve boundary + EXECUTE + format( + 'SELECT %I + FROM observatory.%I + WHERE %I = $1 + LIMIT 1', geom_colname, target_table, geoid_colname) + INTO boundary + USING geometry_id; + + RETURN boundary; + +END; +$$ LANGUAGE plpgsql; + +-- _OBS_GetBoundariesByGeometry +-- internal function for retrieving geometries based on an input geometry +-- see OBS_GetBoundariesByGeometry or OBS_GetBoundariesByPointAndRadius for +-- more information + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetBoundariesByGeometry( + geom geometry(Geometry, 4326), + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +DECLARE + boundary geometry(Geometry, 4326); + geom_colname text; + geoid_colname text; + target_table text; +BEGIN + + -- check inputs + IF lower(overlap_type) NOT IN ('contains', 'intersects', 'within') + THEN + -- recognized overlap type (map to ST_Contains, ST_Intersects, and ST_Within) + RAISE EXCEPTION 'Overlap type ''%'' is not an accepted type (choose intersects, within, or contains)', overlap_type; + ELSIF ST_GeometryType(geom) NOT IN ('ST_Polygon', 'ST_MultiPolygon') + THEN + RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_MultiPolygon'' or ''ST_Polygon''', ST_GeometryType(geom); + END IF; + + -- TODO: add timespan in search + -- TODO: add overlap info in search + SELECT * INTO geoid_colname, target_table, geom_colname + FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); + + -- if no tables are found, raise notice and return null + IF target_table IS NULL + THEN + RAISE NOTICE 'No boundaries found for bounding box ''%'' in ''%''', ST_AsText(geom), boundary_id; + RETURN QUERY SELECT NULL::geometry, NULL::text; + RETURN; + END IF; + + RAISE NOTICE 'target_table: %', target_table; + + -- return first boundary in intersections + RETURN QUERY + EXECUTE format( + 'SELECT %I, %I::text + FROM observatory.%I + WHERE ST_%s($1, the_geom) + ', geom_colname, geoid_colname, target_table, overlap_type) + USING geom; + RETURN; + +END; +$$ LANGUAGE plpgsql; + +-- OBS_GetBoundariesByGeometry +-- +-- Given a bounding box (or a polygon), and it's geometry level (see +-- OBS_ListGeomColumns() for all available boundary ids), give back the +-- boundaries that are contained within the bounding box polygon and the +-- associated geometry ids + +-- Inputs: +-- geom geometry: bounding box (or polygon) of the region of interest +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- table with the following columns +-- boundary geometry: geometry boundary that is contained within the input +-- bounding box at the requested geometry level +-- with boundary_id, and time_span +-- geom_refs text: geometry identifiers (e.g., geoid for the US Census) +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundariesByGeometry( + geom geometry(Geometry, 4326), + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +BEGIN + + RETURN QUERY SELECT * + FROM cdb_observatory._OBS_GetBoundariesByGeometry( + geom, + boundary_id, + time_span, + overlap_type + ); + RETURN; + +END; +$$ LANGUAGE plpgsql; + +-- OBS_GetBoundariesByPointAndRadius +-- +-- Given a point and radius, and it's geometry level (see +-- OBS_ListGeomColumns() for all available boundary ids), give back the +-- boundaries that are contained within the point buffered by radius meters and +-- the associated geometry ids + +-- Inputs: +-- geom geometry: point geometry centered on area of interest +-- radius numeric: radius (in meters) of a circle centered on geom for +-- selecting polygons +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- table with the following columns +-- boundary geometry: geometry boundary that is contained within the input +-- bounding box at the requested geometry level +-- with boundary_id, and time_span +-- geom_refs text: geometry identifiers (e.g., geoid for the US Census) +-- +-- TODO: move to ST_DWithin instead of buffer + intersects? +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetBoundariesByPointAndRadius( + geom geometry(Point, 4326), -- point + radius numeric, -- radius in meters + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +DECLARE + circle_boundary geometry(Geometry, 4326); +BEGIN + + IF ST_GeometryType(geom) != 'ST_Point' + THEN + RAISE EXCEPTION 'Input geometry ''%'' is not a point', ST_AsText(geom); + ELSE + circle_boundary := ST_Buffer(geom::geography, radius)::geometry; + END IF; + + RETURN QUERY SELECT * + FROM cdb_observatory._OBS_GetBoundariesByGeometry( + circle_boundary, + boundary_id, + time_span); + RETURN; +END; +$$ LANGUAGE plpgsql; + +-- _OBS_GetPointsByGeometry + + +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetPointsByGeometry( + geom geometry(Geometry, 4326), + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +DECLARE + boundary geometry(Geometry, 4326); + geom_colname text; + geoid_colname text; + target_table text; +BEGIN + + IF lower(overlap_type) NOT IN ('contains', 'within', 'intersects') + THEN + RAISE EXCEPTION 'Overlap type ''%'' is not an accepted type (choose intersects, within, or contains)', overlap_type; + ELSIF ST_GeometryType(geom) NOT IN ('ST_Polygon', 'ST_MultiPolygon') + THEN + RAISE EXCEPTION 'Invalid geometry type (%), expecting ''ST_MultiPolygon'' or ''ST_Polygon''', ST_GeometryType(geom); + END IF; + + SELECT * INTO geoid_colname, target_table, geom_colname + FROM cdb_observatory._OBS_GetGeometryMetadata(boundary_id); + + -- if no tables are found, raise notice and return null + IF target_table IS NULL + THEN + RAISE NOTICE 'No boundaries found for bounding box ''%'' in ''%''', ST_AsText(geom), boundary_id; + RETURN QUERY SELECT NULL::geometry, NULL::text; + RETURN; + END IF; + + RAISE NOTICE 'target_table: %', target_table; + + -- return first boundary in intersections + RETURN QUERY + EXECUTE format( + 'SELECT ST_PointOnSurface(%I) As %s, %I::text + FROM observatory.%I + WHERE ST_%s($1, the_geom) + ', geom_colname, geom_colname, geoid_colname, target_table, overlap_type) + USING geom; + RETURN; + +END; +$$ LANGUAGE plpgsql; + +-- OBS_GetPointsByGeometry +-- +-- Given a polygon, and it's geometry level (see +-- OBS_ListGeomColumns() for all available boundary ids), give back a point +-- which lies in a boundary from the requested geometry level that is contained +-- within the bounding box polygon and the associated geometry ids +-- +-- Inputs: +-- geom geometry: bounding box (or polygon) of the region of interest +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- table with the following columns +-- boundary geometry: point that lies on a boundary that is contained within +-- the input bounding box at the requested geometry +-- level with boundary_id, and time_span +-- geom_refs text: geometry identifiers (e.g., geoid for the US Census) +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPointsByGeometry( + geom geometry(Geometry, 4326), + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +BEGIN + + RETURN QUERY SELECT * + FROM cdb_observatory._OBS_GetPointsByGeometry( + geom, + boundary_id, + time_span, + overlap_type); + RETURN; + +END; +$$ LANGUAGE plpgsql; + +-- OBS_GetBoundariesByPointAndRadius +-- +-- Given a point and radius, and it's geometry level (see +-- OBS_ListGeomColumns() for all available boundary ids), give back the +-- boundaries that are contained within the point buffered by radius meters and +-- the associated geometry ids + +-- Inputs: +-- geom geometry: point geometry centered on area of interest +-- radius numeric: radius (in meters) of a circle centered on geom for +-- selecting polygons +-- boundary_id text: source id of boundaries (e.g., us.census.tiger.county) +-- see function OBS_ListGeomColumns for all avaiable +-- boundary ids +-- time_span text: time span that the geometries were collected (optional) +-- +-- Output: +-- table with the following columns +-- boundary geometry: geometry boundary that is contained within the input +-- bounding box at the requested geometry level +-- with boundary_id, and time_span +-- geom_refs text: geometry identifiers (e.g., geoid for the US Census) +-- + +CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetPointsByPointAndRadius( + geom geometry(Point, 4326), -- point + radius numeric, -- radius in meters + boundary_id text, + time_span text DEFAULT NULL, + overlap_type text DEFAULT 'intersects') +RETURNS TABLE(the_geom geometry, geom_refs text) +AS $$ +DECLARE + circle_boundary geometry(Geometry, 4326); +BEGIN + + IF ST_GeometryType(geom) != 'ST_Point' + THEN + RAISE EXCEPTION 'Input geometry ''%'' is not a point', ST_AsText(geom); + ELSE + circle_boundary := ST_Buffer(geom::geography, radius)::geometry; + END IF; + + RETURN QUERY SELECT * + FROM cdb_observatory._OBS_GetPointsByGeometry( + ST_Buffer(geom::geography, radius)::geometry, + boundary_id, + time_span, + overlap_type); + RETURN; +END; +$$ LANGUAGE plpgsql; + + +-- _OBS_GetGeometryMetadata() +-- TODO: add timespan in search +-- TODO: add choice of clipped versus not clipped +CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryMetadata(boundary_id text) +RETURNS table(geoid_colname text, target_table text, geom_colname text) +AS $$ +BEGIN + + RETURN QUERY + EXECUTE + format($string$ + SELECT geoid_ct.colname::text As geoid_colname, + tablename::text, + geom_ct.colname::text As geom_colname + FROM observatory.obs_column_table As geoid_ct, + observatory.obs_table As geom_t, + observatory.obs_column_table As geom_ct, + observatory.obs_column As geom_c + WHERE geoid_ct.column_id + IN ( + SELECT source_id + FROM observatory.obs_column_to_column + WHERE reltype = 'geom_ref' + AND target_id = '%s' + ) + AND geoid_ct.table_id = geom_t.id AND + geom_t.id = geom_ct.table_id AND + geom_ct.column_id = geom_c.id AND + geom_c.type ILIKE 'geometry' AND + geom_c.id = '%s' + $string$, boundary_id, boundary_id); + RETURN; + -- AND geom_t.timespan = '%s' <-- put in requested year + -- TODO: filter by clipped vs. not so appropriate tablename are unique + -- so the limit 1 can be removed + RETURN; + +END; +$$ LANGUAGE plpgsql; +-- Placeholder for permission tweaks at creation time. +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_observatory FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +-- GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_observatory FROM PUBLIC, publicuser; diff --git a/release/observatory.control b/release/observatory.control index 1e13336..840a4be 100644 --- a/release/observatory.control +++ b/release/observatory.control @@ -1,5 +1,5 @@ comment = 'CartoDB Observatory backend extension' -default_version = '0.0.5' +default_version = '1.0.0' requires = 'postgis' superuser = true schema = cdb_observatory