Merge pull request #241 from CartoDB/obs_getmeasure_res_bypass

Obs getmeasure res bypass
This commit is contained in:
john krauss 2016-12-28 14:44:01 -05:00 committed by GitHub
commit 3233cb527e
12 changed files with 6866 additions and 4309 deletions

View File

@ -168,10 +168,11 @@ FIXTURES = [
('us.census.acs.B19001015', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B19001016', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B19001017', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2015'),
('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2014'),
('us.census.tiger.block_group', 'us.census.tiger.block_group', '2014'),
('us.census.tiger.zcta5', 'us.census.tiger.zcta5', '2014'),
('us.census.tiger.county', 'us.census.tiger.county', '2014'),
('us.census.tiger.block_group', 'us.census.tiger.block_group', '2015'),
('us.census.tiger.zcta5', 'us.census.tiger.zcta5', '2015'),
('us.census.tiger.county', 'us.census.tiger.county', '2015'),
('us.census.acs.B01001002', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.census.acs.B01003001_quantile', 'us.census.tiger.census_tract', '2010 - 2014'),
@ -204,6 +205,7 @@ def dump(cols, tablename, where=''):
' | sed "s:SET search_path.*::" '
' | sed "s:CREATE TABLE :CREATE TABLE observatory.:" '
' | sed "s:ALTER TABLE.*OWNER.*::" '
' | sed "s:SET idle_in_transaction_session_timeout.*::" '
' >> {outfile}'.format(
tablename=tablename,
outfile=OUTFILE_PATH,

View File

@ -335,6 +335,104 @@ END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureMetaMulti(
geom geometry(Geometry, 4326),
params JSON,
max_timespan_rank INTEGER DEFAULT NULL, -- cutoff for timespan ranks when there's ambiguity
max_score_rank INTEGER DEFAULT NULL, -- cutoff for geom ranks when there's ambiguity
target_geoms INTEGER DEFAULT NULL
)
RETURNS JSON
AS $$
DECLARE
result JSON;
BEGIN
IF max_timespan_rank IS NULL THEN
max_timespan_rank := 1;
END IF;
IF max_score_rank IS NULL THEN
max_score_rank := 1;
END IF;
EXECUTE $string$
WITH _filters AS (SELECT
generate_series(1, array_length($3, 1)) id,
(unnest($3))->>'numer_id' numer_id,
(unnest($3))->>'denom_id' denom_id,
(unnest($3))->>'geom_id' geom_id,
(unnest($3))->>'timespan' timespan
), meta AS (SELECT
id, geom_tid,
numer_aggregate, numer_colname, numer_geomref_colname,
numer_tablename, denom_aggregate, denom_colname, denom_geomref_colname,
denom_tablename, geom_colname, geom_geomref_colname,
geom_tablename, numer_name, denom_name, geom_name,
numer_type, denom_type, geom_type,
m.denom_id, m.geom_id, m.numer_timespan
FROM observatory.obs_meta m JOIN _filters f
ON m.numer_id = f.numer_id
WHERE
m.numer_id = ANY ($6)
AND (m.denom_id = f.denom_id OR COALESCE(f.denom_id, '') = '')
AND (m.geom_id = f.geom_id OR COALESCE(f.geom_id, '') = '')
AND (m.numer_timespan = f.timespan OR COALESCE(f.timespan, '') = '')
), scores AS (
SELECT *
FROM cdb_observatory._OBS_GetGeometryScores($1,
(SELECT Array_Agg(geom_id) FROM meta), $2) scores
), groups AS (SELECT
id, scores.score, numer_timespan,
dense_rank() OVER (PARTITION BY id ORDER BY numer_timespan DESC) timespan_rank,
dense_rank() OVER (PARTITION BY id ORDER BY score DESC) score_rank,
json_build_object(
'timespan_rank', dense_rank() OVER (PARTITION BY id ORDER BY numer_timespan DESC),
'score_rank', dense_rank() OVER (PARTITION BY id ORDER BY score DESC),
'score', scores.score,
'numer_aggregate', meta.numer_aggregate,
'numer_colname', meta.numer_colname,
'numer_geomref_colname', meta.numer_geomref_colname,
'numer_tablename', meta.numer_tablename,
'numer_type', meta.numer_type,
'denom_aggregate', meta.denom_aggregate,
'denom_colname', denom_colname,
'denom_geomref_colname', denom_geomref_colname,
'denom_tablename', denom_tablename,
'denom_type', meta.denom_type,
'geom_colname', geom_colname,
'geom_geomref_colname', geom_geomref_colname,
'geom_tablename', geom_tablename,
'geom_type', meta.geom_type,
'timespan', numer_timespan,
'numer_name', numer_name,
'denom_name', denom_name,
'geom_name', geom_name,
'denom_id', denom_id,
'geom_id', meta.geom_id
) metadata
FROM meta, scores
WHERE meta.geom_id = scores.column_id
AND meta.geom_tid = scores.table_id
) SELECT JSON_AGG(metadata ORDER BY id)
FROM groups
WHERE timespan_rank <= $4
AND score_rank <= $5
$string$
INTO result
USING
CASE WHEN ST_GeometryType(geom) = 'ST_Point' THEN
ST_Buffer(geom::geography, 200)::geometry(geometry, 4326)
ELSE geom
END,
target_geoms,
(SELECT ARRAY(SELECT json_array_elements_text(params))::json[]),
max_timespan_rank,
max_score_rank,
(SELECT Array_Agg(val) from (select (JSON_Array_Elements(params))->>'numer_id' val) foo)
;
RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
geom geometry(Geometry, 4326),
measure_id TEXT,
@ -347,21 +445,13 @@ RETURNS NUMERIC
AS $$
DECLARE
geom_type TEXT;
map_type TEXT;
numer_aggregate TEXT;
numer_colname TEXT;
numer_geomref_colname TEXT;
numer_tablename TEXT;
denom_colname TEXT;
denom_geomref_colname TEXT;
denom_tablename TEXT;
geom_colname TEXT;
geom_geomref_colname TEXT;
geom_tablename TEXT;
geom_id TEXT;
params JSON;
result NUMERIC;
sql TEXT;
numer_name TEXT;
denom_name TEXT;
geom_name TEXT;
denom_id TEXT;
map_type TEXT;
BEGIN
IF geom IS NULL THEN
RETURN NULL;
@ -382,160 +472,43 @@ BEGIN
ST_GeometryType(geom);
END IF;
EXECUTE
$query$
WITH meta AS (SELECT numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename,
denom_colname, denom_geomref_colname, denom_tablename,
geom_colname, geom_geomref_colname, geom_tablename,
numer_name, geom_id
FROM observatory.obs_meta
WHERE (geom_id = $1 OR ($1 = ''))
AND numer_id = $2
AND (numer_timespan = $3 OR ($3 = ''))),
scores AS (SELECT *
FROM cdb_observatory._OBS_GetGeometryScores($4,
(SELECT Array_Agg(geom_id) FROM meta), 500))
SELECT meta.*
FROM meta, scores
WHERE meta.geom_id = scores.geom_id
ORDER BY score DESC
LIMIT 1
$query$
INTO numer_aggregate, numer_colname, numer_geomref_colname, numer_tablename,
denom_colname, denom_geomref_colname, denom_tablename,
geom_colname, geom_geomref_colname, geom_tablename, numer_name, geom_id
USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''),
CASE WHEN ST_GeometryType(geom) = 'ST_Point' THEN
st_buffer(geom::geography, 10)::geometry(geometry, 4326)
ELSE geom
END;
IF geom_id IS NULL THEN
RAISE NOTICE 'No boundary found for geom';
RETURN NULL;
ELSE
RAISE NOTICE 'Using boundary %', geom_id;
END IF;
IF normalize ILIKE 'area' AND numer_aggregate ILIKE 'sum' THEN
IF normalize ILIKE 'area%' THEN --AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSIF normalize ILIKE 'denominator' THEN
ELSIF normalize ILIKE 'denom%' THEN
map_type := 'denominated';
ELSE
-- defaults: area normalization for point if it's possible and none for
-- polygon or non-summable point
IF geom_type = 'point' AND numer_aggregate ILIKE 'sum' THEN
IF geom_type = 'point' THEN --AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSE
map_type := 'predenominated';
END IF;
END IF;
params := (SELECT cdb_observatory.OBS_GetMeasureMetaMulti(
geom, JSON_Build_Array(JSON_Build_Object('numer_id', measure_id,
'geom_id', boundary_id,
'timespan', time_span
)), 1, 1, 500));
IF geom_type = 'point' THEN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I)
LIMIT 1)
SELECT numer.%I / (SELECT area FROM _geom)
FROM observatory.%I numer
WHERE numer.%I = (SELECT geom_ref FROM _geom)',
geom_colname, geom_geomref_colname, geom_tablename,
geom_colname, numer_colname, numer_tablename,
numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
FROM observatory.%I numer
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, denom_colname, denom_tablename,
denom_geomref_colname, numer_geomref_colname,
numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
ELSIF map_type = 'predenominated' THEN
sql = format('SELECT numer.%I
FROM observatory.%I numer
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
END IF;
ELSIF geom_type = 'polygon' THEN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
(ST_Area($1::Geography) / 1000000)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname, geom_tablename,
geom_colname, geom_geomref_colname, numer_colname,
numer_geomref_colname, numer_tablename, numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I),
_denom AS (SELECT denom.%I, denom.%I geom_ref
FROM observatory.%I denom
WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap
FROM _geom
WHERE _geom.geom_ref = _denom.geom_ref)
FROM _denom WHERE _denom.geom_ref = numer.%I)), 0)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
denom_colname, denom_geomref_colname, denom_tablename,
denom_geomref_colname, numer_colname, numer_geomref_colname,
denom_colname, numer_geomref_colname,
numer_tablename, numer_geomref_colname);
ELSIF map_type = 'predenominated' THEN
IF numer_aggregate NOT ILIKE 'sum' THEN
RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
numer_name, measure_id;
ELSE
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap,
geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I
)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
numer_colname, numer_geomref_colname, numer_tablename,
numer_geomref_colname);
END IF;
END IF;
params := JSON_Build_Array(JSONB_Set((params::JSONB)->0, '{normalization}', to_jsonb(map_type))::JSON);
IF params->0->>'geom_id' IS NULL THEN
RAISE NOTICE 'No boundary found for geom';
RETURN NULL;
ELSE
RAISE NOTICE 'Using boundary %', params->0->>'geom_id';
END IF;
EXECUTE sql INTO result USING geom;
SELECT measure FROM
cdb_observatory.OBS_GetMeasureDataMulti(ARRAY[(geom, 1)::geomval], params)
AS (id INT, measure NUMERIC)
INTO result;
RETURN result;
END;
$$ LANGUAGE plpgsql;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureById(
geom_ref TEXT,
@ -584,6 +557,157 @@ BEGIN
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureDataMulti(
geomvals geomval[],
params JSON
)
RETURNS SETOF RECORD
AS $$
DECLARE
colspecs TEXT;
tables TEXT;
obs_wheres TEXT;
user_wheres TEXT;
measure_id text;
measures_list text;
measures_query text;
geom_table_name text;
data_table_name text;
BEGIN
EXECUTE
$query$
WITH _meta AS (SELECT
generate_series(1, array_length($1, 1)) colid,
(unnest($1))->>'numer_id' numer_id,
(unnest($1))->>'numer_aggregate' numer_aggregate,
(unnest($1))->>'numer_colname' numer_colname,
(unnest($1))->>'numer_geomref_colname' numer_geomref_colname,
(unnest($1))->>'numer_tablename' numer_tablename,
(unnest($1))->>'numer_type' numer_type,
(unnest($1))->>'denom_id' denom_id,
(unnest($1))->>'denom_aggregate' denom_aggregate,
(unnest($1))->>'denom_colname' denom_colname,
(unnest($1))->>'denom_geomref_colname' denom_geomref_colname,
(unnest($1))->>'denom_tablename' denom_tablename,
(unnest($1))->>'denom_type' denom_type,
(unnest($1))->>'geom_id' geom_id,
(unnest($1))->>'geom_colname' geom_colname,
(unnest($1))->>'geom_geomref_colname' geom_geomref_colname,
(unnest($1))->>'geom_tablename' geom_tablename,
(unnest($1))->>'geom_type' geom_type,
(unnest($1))->>'timespan' timespan,
(unnest($1))->>'normalization' normalization
)
SELECT String_Agg(CASE
-- denominated
WHEN LOWER(normalization) LIKE 'denom%' OR (normalization IS NULL AND denom_id IS NOT NULL)
THEN ' CASE ' ||
-- denominated point-in-poly or user polygon is same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname ||
' / NullIf(' || denom_tablename || '.' || denom_colname || ', 0))' ||
-- denominated polygon interpolation
-- SUM (numer * (% OBS geom in user geom)) / SUM (denom * (% OBS geom in user geom))
' ELSE ' ||
' SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) / '
' NULLIF(SUM(' || denom_tablename || '.' || denom_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END), 0) '
' END '
-- areaNormalized
WHEN LOWER(normalization) LIKE 'area%' OR (normalization IS NULL AND numer_aggregate ILIKE 'sum')
THEN ' CASE ' ||
-- areaNormalized point-in-poly or user polygon is the same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname ||
' / (ST_Area(' || geom_tablename || '.' || geom_colname || '::Geography)/1000000)) ' ||
-- areaNormalized polygon interpolation
-- SUM (numer * (% OBS geom in user geom)) / area of big geom
' ELSE ' ||
--' NULL END '
' SUM((' || numer_tablename || '.' || numer_colname || ') ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') THEN 1 ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) THEN ' ||
' ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' / ST_Area(_geoms.geom)' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(_geoms.geom))' ||
' END / (ST_Area(' || geom_tablename || '.' || geom_colname || '::Geography) / 1000000)) END '
-- prenormalized
ELSE ' CASE ' ||
-- predenominated point-in-poly or user polygon is the same as OBS- polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE ' ||
-- predenominated polygon interpolation
-- TODO should weight by universe instead of area
-- SUM (numer * (% user geom in OBS geom))
' SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) END '
END || ':: ' || numer_type || ' AS ' || numer_colname, ', ') AS colspecs,
(SELECT String_Agg(tablename, ', ') FROM (SELECT JSONB_Object_Keys(JSONB_Object(
Array_Cat(Array_Agg('observatory.' || numer_tablename),
Array_Cat(Array_Agg('observatory.' || geom_tablename),
Array_Agg('observatory.' || denom_tablename) FILTER (WHERE denom_tablename IS NOT NULL))),
Array_Cat(Array_Agg(numer_tablename),
Array_Cat(Array_Agg(geom_tablename),
Array_Agg(denom_tablename) FILTER (WHERE denom_tablename IS NOT NULL)))
)) tablename) bar) tablenames,
String_Agg(numer_tablename || '.' || numer_geomref_colname || ' = ' ||
geom_tablename || '.' || geom_geomref_colname ||
Coalesce(' AND ' || numer_tablename || '.' || numer_geomref_colname || ' = ' ||
denom_tablename || '.' || denom_geomref_colname, ''),
' AND ') AS obs_wheres,
String_Agg('ST_Intersects(' || geom_tablename || '.' || geom_colname
|| ', _geoms.geom)', ' AND ')
AS user_wheres
FROM _meta
;
$query$
INTO colspecs, tables, obs_wheres, user_wheres
USING (SELECT ARRAY(SELECT json_array_elements_text(params))::json[]);
RETURN QUERY EXECUTE format($query$
WITH _geoms AS (SELECT
(UNNEST($1)).val as id,
(UNNEST($1)).geom AS geom)
SELECT _geoms.id::INT, %s
FROM %s, _geoms
WHERE %s
AND %s
GROUP BY _geoms.id
ORDER BY _geoms.id
$query$, colspecs, tables, obs_wheres, user_wheres)
USING geomvals;
RETURN;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetCategory(
geom geometry(Geometry, 4326),
category_id TEXT,

View File

@ -298,7 +298,7 @@ BEGIN
) SELECT available_geoms.*, score, numtiles, notnull_percent, numgeoms,
percentfill, estnumgeoms, meanmediansize
FROM available_geoms, scores
WHERE available_geoms.geom_id = scores.geom_id
WHERE available_geoms.geom_id = scores.column_id
$string$, geom_clause)
USING numer_id, denom_id, timespan, filter_tags, bounds;
RETURN;
@ -416,11 +416,12 @@ $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryScores(
bounds Geometry(Geometry, 4326) DEFAULT NULL,
filter_geom_ids TEXT[] DEFAULT NULL,
desired_num_geoms INTEGER DEFAULT 3000
desired_num_geoms INTEGER DEFAULT NULL
) RETURNS TABLE (
score NUMERIC,
numtiles BIGINT,
geom_id TEXT,
table_id TEXT,
column_id TEXT,
notnull_percent NUMERIC,
numgeoms NUMERIC,
percentfill NUMERIC,
@ -428,6 +429,9 @@ CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryScores(
meanmediansize NUMERIC
) AS $$
BEGIN
IF desired_num_geoms IS NULL THEN
desired_num_geoms := 3000;
END IF;
filter_geom_ids := COALESCE(filter_geom_ids, (ARRAY[])::TEXT[]);
-- Very complex geometries simply fail. For a boundary check, we can
-- comfortably get away with the simplicity of an envelope
@ -446,39 +450,33 @@ BEGIN
AND (column_id = ANY($2) OR cardinality($2) = 0)
), clipped_geom_countagg AS (
SELECT column_id, table_id
, ST_CountAgg(clipped_tile, 1, True)::Numeric notnull_pixels -- -10
, BOOL_AND(ST_BandIsNoData(clipped_tile, 1)) nodata
, ST_CountAgg(clipped_tile, 1, False)::Numeric pixels -- -10
FROM clipped_geom
GROUP BY column_id, table_id
), clipped_geom_reagg AS (
SELECT COUNT(*)::BIGINT cnt, a.column_id,
SELECT COUNT(*)::BIGINT cnt, a.column_id, a.table_id,
cdb_observatory.FIRST(nodata) first_nodata,
cdb_observatory.FIRST(pixels) first_pixel,
cdb_observatory.FIRST(notnull_pixels) first_notnull_pixel,
cdb_observatory.FIRST(tile) first_tile,
(ST_SummaryStatsAgg(clipped_tile, 1, True)).sum::Numeric sum_geoms, -- ND
(ST_SummaryStatsAgg(clipped_tile, 2, True)).mean::Numeric / 255 mean_fill --ND
(ST_SummaryStatsAgg(clipped_tile, 1, False)).sum::Numeric sum_geoms, -- ND
(ST_SummaryStatsAgg(clipped_tile, 2, False)).mean::Numeric / 255 mean_fill --ND
FROM clipped_geom_countagg a, clipped_geom b
WHERE a.table_id = b.table_id
AND a.column_id = b.column_id
GROUP BY a.column_id, a.table_id
), final AS (
SELECT
cnt, column_id
, (CASE WHEN first_notnull_pixel > 0
THEN first_notnull_pixel / first_pixel
ELSE 1
END)::Numeric
AS notnull_percent
, (CASE WHEN first_notnull_pixel > 0
cnt, table_id, column_id
, NULL::Numeric AS notnull_percent
, (CASE WHEN first_nodata IS FALSE
THEN sum_geoms
ELSE COALESCE(ST_Value(first_tile, 1, ST_PointOnSurface($1)), 0)
* (ST_Area($1) / ST_Area(ST_PixelAsPolygon(first_tile, 0, 0))
* first_pixel) -- -20
END)::Numeric
AS numgeoms
, (CASE WHEN first_notnull_pixel > 0
, (CASE WHEN first_nodata IS FALSE
THEN mean_fill
ELSE COALESCE(ST_Value(first_tile, 2, ST_PointOnSurface($1))::Numeric / 255, 0) -- -2
END)::Numeric

View File

@ -566,6 +566,8 @@ BEGIN
geom_ct.column_id = geom_c.id AND
geom_c.type ILIKE 'geometry' AND
geom_c.id = '%s'
ORDER BY timespan DESC
LIMIT 1
$string$, boundary_id, boundary_id);
RETURN;
-- AND geom_t.timespan = '%s' <-- put in requested year

View File

@ -9,18 +9,6 @@ t
obs_get_median_income_at_null_island
t
(1 row)
obs_getpoints_for_test_point_value|obs_getpoints_for_test_point_name|obs_getpoints_for_test_point_tablename|obs_getpoints_for_test_point_aggregate|obs_getpoints_for_test_point_type|obs_getpoints_for_test_point_description
t|t|t|t|t|t
(1 row)
obs_getpoints_for_null_island
t
(1 row)
obs_getpolygons_for_test_point
t
(1 row)
obs_getpolygons_for_null_island
t
(1 row)
test_point_segmentation
t
(1 row)

View File

@ -114,7 +114,7 @@ t
_obs_getavailablegeometries_foobarbaz_denom_not_in_usa_bg
t
(1 row)
_obs_getavailablegeometries_bg_2014
_obs_getavailablegeometries_bg_2015
t
(1 row)
_obs_getavailablegeometries_bg_not_1996

View File

@ -15,16 +15,17 @@ DROP TABLE IF EXISTS observatory.obs_meta_timespan;
DROP TABLE IF EXISTS observatory.obs_column_table_tile;
DROP TABLE IF EXISTS observatory.obs_column_table_tile_simple;
DROP TABLE IF EXISTS observatory.obs_fcd4e4f5610f6764973ef8c0c215b2e80bec8963;
DROP TABLE IF EXISTS observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308;
DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_7615e8622a68bfc5fe37c69c9880edfb40250103;
DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13;
DROP TABLE IF EXISTS observatory.obs_1babf5a26a1ecda5fb74963e88408f71d0364b81;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_78fb6c1d6ff6505225175922c2c389ce48d7632c;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_144e8b4f906885b2e057ac4842644a553ae49c6e;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_fc050f0b8673cfe3c6aa1040f749eb40975691b7;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_1ea93bbc109c87c676b3270789dacf7a1430db6c;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;

File diff suppressed because one or more lines are too long

View File

@ -33,67 +33,6 @@ WITH result as (
) select expected = 0 as OBS_Get_median_income_at_null_island
from result;
-- OBS_GetPoints
-- obs_getpoints
-- --------------------
-- {4809.33511352425}
-- SELECT
-- (cdb_observatory._OBS_GetPoints(
-- cdb_observatory._TestPoint(),
-- 'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- block groups (see _obs_geomtable)
-- (Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
-- ))[1]::text = '{"value":10923.093200390833950,"name":"Total Population","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'
-- as OBS_GetPoints_for_test_point;
WITH cte As (
SELECT
(cdb_observatory._OBS_GetPoints(
cdb_observatory._TestPoint(),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- block groups (see _obs_geomtable)
(Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
))[1]
as OBS_GetPoints_for_test_point)
SELECT
(abs((OBS_GetPoints_for_test_point ->> 'value')::numeric - 10923.093200390833950) / 10923.093200390833950) < 0.001 As OBS_GetPoints_for_test_point_value,
(OBS_GetPoints_for_test_point ->> 'name') = 'Total Population' As OBS_GetPoints_for_test_point_name,
(OBS_GetPoints_for_test_point ->> 'tablename') = 'obs_1a098da56badf5f32e336002b0a81708c40d29cd' As OBS_GetPoints_for_test_point_tablename,
(OBS_GetPoints_for_test_point ->> 'aggregate') = 'sum' As OBS_GetPoints_for_test_point_aggregate,
(OBS_GetPoints_for_test_point ->> 'type') = 'Numeric' As OBS_GetPoints_for_test_point_type,
(OBS_GetPoints_for_test_point ->> 'description') = 'The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates.' As OBS_GetPoints_for_test_point_description
FROM cte;
-- what happens at null island
SELECT
(cdb_observatory._OBS_GetPoints(
ST_SetSRID(ST_Point(0, 0), 4326),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
(Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
))[1]::text is null
as OBS_GetPoints_for_null_island;
-- OBS_GetPolygons
-- obs_getpolygons
-- --------------------
-- {12996.8172420752}
SELECT
(cdb_observatory._OBS_GetPolygons(
cdb_observatory._TestArea(),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json]
))[1]::text = '{"value":12327.3133495107,"name":"Total Population","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'
as OBS_GetPolygons_for_test_point;
-- see what happens around null island
SELECT
((cdb_observatory._OBS_GetPolygons(
ST_Buffer(ST_SetSRID(ST_Point(0, 0), 4326)::geography, 500)::geometry,
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
)[1]->>'value') is null
as OBS_GetPolygons_for_null_island;
SELECT cdb_observatory.OBS_GetSegmentSnapshot(
cdb_observatory._TestPoint(),
'us.census.tiger.census_tract'
@ -196,7 +135,7 @@ SELECT (abs(cdb_observatory.OBS_GetMeasure(
-- Poly-based OBS_GetMeasure with denominator normalization
SELECT abs(cdb_observatory.OBS_GetMeasure(
cdb_observatory._TestArea(),
'us.census.acs.B01001002', 'denominator') - 0.49026340444793965457) / 0.49026340444793965457 < 0.001 As OBS_GetMeasure_total_male_poly_denominator;
'us.census.acs.B01001002', 'denominator', null, '2010 - 2014') - 0.49026340444793965457) / 0.49026340444793965457 < 0.001 As OBS_GetMeasure_total_male_poly_denominator;
-- Poly-based OBS_GetMeasure with one very bad geom
SELECT abs(cdb_observatory.OBS_GetMeasure(

View File

@ -278,9 +278,9 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_usa_bg;
SELECT 'us.census.tiger.block_group' IN (SELECT geom_id
FROM cdb_observatory.OBS_GetAvailableGeometries(
ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326),
NULL, NULL, NULL, '2014'
NULL, NULL, NULL, '2015'
) WHERE valid_timespan = True)
AS _obs_getavailablegeometries_bg_2014;
AS _obs_getavailablegeometries_bg_2015;
SELECT 'us.census.tiger.block_group' NOT IN (SELECT geom_id
FROM cdb_observatory.OBS_GetAvailableGeometries(
@ -350,126 +350,140 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014;
-- _OBS_GetGeometryScores tests
--
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']);
'us.census.tiger.county', 'us.census.tiger.zcta5'])
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']);
'us.census.tiger.county', 'us.census.tiger.zcta5'])
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 2500000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text
= '{ "us.census.tiger.block_group" : 9, "us.census.tiger.census_tract" : 3, "us.census.tiger.zcta5" : 0, "us.census.tiger.county" : 0 }'
AS _obs_geometryscores_numgeoms_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 899, "us.census.tiger.census_tract" : 328, "us.census.tiger.zcta5" : 45, "us.census.tiger.county" : 1 }'
AS _obs_geometryscores_numgeoms_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 12112, "us.census.tiger.census_tract" : 3792, "us.census.tiger.zcta5" : 550, "us.census.tiger.county" : 14 }'
AS _obs_geometryscores_numgeoms_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 48420, "us.census.tiger.census_tract" : 15774, "us.census.tiger.zcta5" : 6533, "us.census.tiger.county" : 304 }'
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 48420, "us.census.tiger.census_tract" : 15775, "us.census.tiger.zcta5" : 6533, "us.census.tiger.county" : 304 }'
AS _obs_geometryscores_numgeoms_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 165475, "us.census.tiger.census_tract" : 55128, "us.census.tiger.zcta5" : 26499, "us.census.tiger.county" : 2622 }'
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 169119, "us.census.tiger.census_tract" : 56380, "us.census.tiger.zcta5" : 26498, "us.census.tiger.county" : 2704 }'
AS _obs_geometryscores_numgeoms_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 2500000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.zcta5', 'us.census.tiger.county'])
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50);
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50)
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500);
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500)
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_2500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 2500);
'us.census.tiger.zcta5', 'us.census.tiger.county'], 2500)
WHERE table_id LIKE '%2015%';
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_25000_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 25000);
'us.census.tiger.zcta5', 'us.census.tiger.county'], 25000)
WHERE table_id LIKE '%2015%';
--
-- OBS_LegacyBuilderMetadata tests

View File

@ -73,7 +73,7 @@ SELECT cdb_observatory.OBS_GetBoundaryId(
SELECT cdb_observatory.OBS_GetBoundaryId(
cdb_observatory._TestPoint(),
'us.census.tiger.county',
'2014'
'2015'
) = '36047'::text As OBS_GetBoundaryId_cartodb_county_with_year;
-- should give back null since there is not a census tract at null island
@ -335,7 +335,7 @@ FROM (
SELECT
geoid_colname = 'geoid' As geoid_name_matches,
target_table = 'obs_fc050f0b8673cfe3c6aa1040f749eb40975691b7' As table_name_matches,
target_table = 'obs_87a814e485deabe3b12545a537f693d16ca702c2' As table_name_matches,
geom_colname = 'the_geom' As geom_name_matches
FROM cdb_observatory._OBS_GetGeometryMetadata('us.census.tiger.census_tract')
As m(geoid_colname, target_table, geom_colname);

View File

@ -23,11 +23,11 @@ for q in (
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326),
st_makeenvelope(-74.1, 40.5,
-73.8, 40.9, 4326),
'us.census.tiger.census_tract_clipped')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 500''',
LIMIT 1000''',
'DROP TABLE IF EXISTS obs_perftest_complex',
'''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY,
point GEOMETRY,
@ -79,16 +79,17 @@ for q in (
ARGS = {
('OBS_GetMeasureByID', None): "name, 'us.census.acs.B01001002', '{}'",
('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001'",
('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'",
('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'",
('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'",
('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001', null, {}",
('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area', {}",
('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator', {}",
('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10', {}",
('_OBS_GetGeometryScores', None): "{}, NULL"
}
def record(params, results):
sha = os.environ['OBS_EXTENSION_SHA']
msg = os.environ.get('OBS_EXTENSION_MSG')
fpath = os.path.join(os.environ['OBS_PERFTEST_DIR'], sha + '.json')
if os.path.isfile(fpath):
tests = json.load(open(fpath, 'r'))
@ -126,13 +127,13 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
for rows in rownums:
stmt = '''SELECT {schema}{api_method}(geom, {filters}, {target_geoms})
FROM obs_perftest_{complexity}
WHERE cartodb_id < {n}'''.format(
WHERE cartodb_id <= {n}'''.format(
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
filters=filters,
target_geoms=target_geoms,
n=rows+1)
n=rows)
start = time()
query(stmt)
end = time()
@ -153,51 +154,77 @@ def test_getgeometryscores_performance(geom_complexity, api_method, filters, tar
}, results)
@parameterized([
('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'),
('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'),
('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract', None),
('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county', None),
('simple', 'OBS_GetMeasure', 'predenominated', 'point'),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom'),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('simple', 'OBS_GetMeasure', 'area', 'point'),
('simple', 'OBS_GetMeasure', 'area', 'geom'),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'point'),
('simple', 'OBS_GetMeasure', 'denominator', 'geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('simple', 'OBS_GetCategory', None, 'point'),
('simple', 'OBS_GetCategory', None, 'geom'),
('simple', 'OBS_GetCategory', None, 'offset_geom'),
('simple', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
('simple', 'OBS_GetCategory', None, 'point', 'NULL'),
('simple', 'OBS_GetCategory', None, 'geom', 'NULL'),
('simple', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'point'),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom'),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'area', 'point'),
('complex', 'OBS_GetMeasure', 'area', 'geom'),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'point'),
('complex', 'OBS_GetMeasure', 'denominator', 'geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('complex', 'OBS_GetCategory', None, 'point'),
('complex', 'OBS_GetCategory', None, 'geom'),
('complex', 'OBS_GetCategory', None, 'offset_geom'),
('simple', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
('complex', 'OBS_GetCategory', None, 'point', 'NULL'),
('complex', 'OBS_GetCategory', None, 'geom', 'NULL'),
('complex', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
])
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom):
print api_method, geom_complexity, normalization, geom
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom, boundary):
print api_method, geom_complexity, normalization, geom, boundary
col = 'measure' if 'measure' in api_method.lower() else 'category'
results = []
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50 )
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50, )
for rows in rownums:
stmt = '''UPDATE obs_perftest_{complexity}
SET {col} = {schema}{api_method}({args})
WHERE cartodb_id < {n}'''.format(
WHERE cartodb_id <= {n}'''.format(
col=col,
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
args=ARGS[api_method, normalization].format(geom),
n=rows+1)
args=ARGS[api_method, normalization].format(geom, boundary),
n=rows)
start = time()
query(stmt)
end = time()
@ -214,5 +241,106 @@ def test_getmeasure_performance(geom_complexity, api_method, normalization, geom
'geom_complexity': geom_complexity,
'api_method': api_method,
'normalization': normalization,
'boundary': boundary,
'geom': geom
}, results)
@parameterized([
('simple', 'predenominated', 'point', 'null'),
('simple', 'predenominated', 'geom', 'null'),
('simple', 'predenominated', 'offset_geom', 'null'),
('simple', 'area', 'point', 'null'),
('simple', 'area', 'geom', 'null'),
('simple', 'area', 'offset_geom', 'null'),
('simple', 'denominator', 'point', 'null'),
('simple', 'denominator', 'geom', 'null'),
('simple', 'denominator', 'offset_geom', 'null'),
('simple', 'predenominated', 'point', "'us.census.tiger.census_tract'"),
('simple', 'predenominated', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'predenominated', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'area', 'point', "'us.census.tiger.census_tract'"),
('simple', 'area', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'area', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'denominator', 'point', "'us.census.tiger.census_tract'"),
('simple', 'denominator', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'denominator', 'offset_geom', "'us.census.tiger.census_tract'"),
('complex', 'predenominated', 'point', 'null'),
('complex', 'predenominated', 'geom', 'null'),
('complex', 'predenominated', 'offset_geom', 'null'),
('complex', 'area', 'point', 'null'),
('complex', 'area', 'geom', 'null'),
('complex', 'area', 'offset_geom', 'null'),
('complex', 'denominator', 'point', 'null'),
('complex', 'denominator', 'geom', 'null'),
('complex', 'denominator', 'offset_geom', 'null'),
('complex', 'predenominated', 'point', "'us.census.tiger.county'"),
('complex', 'predenominated', 'geom', "'us.census.tiger.county'"),
('complex', 'predenominated', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'area', 'point', "'us.census.tiger.county'"),
('complex', 'area', 'geom', "'us.census.tiger.county'"),
('complex', 'area', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'denominator', 'point', "'us.census.tiger.county'"),
('complex', 'denominator', 'geom', "'us.census.tiger.county'"),
('complex', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
])
def test_getmeasure_split_performance(geom_complexity, normalization, geom, boundary):
print geom_complexity, normalization, geom, boundary
results = []
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (10, 50, 100)
for rows in rownums:
stmt = '''
with data as (
SELECT * FROM {schema}{api_method}datamulti(
(SELECT array_agg(({geom}, cartodb_id)::geomval)
FROM obs_perftest_{complexity}
WHERE cartodb_id <= {n}),
(SELECT {schema}{api_method}metamulti(
(SELECT st_setsrid(st_extent({geom}), 4326)
FROM obs_perftest_{complexity}
WHERE cartodb_id <= {n}),
'[{{
"numer_id": "us.census.acs.B01001002",
"normalization": "{normalization}",
"geom_id": {boundary}
}}]'::JSON
))
)
AS x(cartodb_id INTEGER, measure Numeric))
UPDATE obs_perftest_{complexity}
SET measure = data.measure
FROM data
WHERE obs_perftest_{complexity}.cartodb_id = data.cartodb_id
;
'''.format(
point_or_poly='point' if geom == 'point' else 'polygon',
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method='obs_getmeasure',
normalization=normalization,
geom=geom,
boundary=boundary.replace("'", '"'),
n=rows)
start = time()
query(stmt)
end = time()
qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': 'OBS_GetMeasureMeta/OBS_GetMeasureData',
'normalization': normalization,
'boundary': boundary,
'geom': geom
}, results)