keep track of table_id in obs_meta and geometryscores, use obs_getmeasure*multi for obs_getmeasure

This commit is contained in:
John Krauss 2016-12-21 21:53:53 +00:00
parent 24587b7e03
commit d3a57e637c
10 changed files with 6746 additions and 4414 deletions

View File

@ -168,10 +168,11 @@ FIXTURES = [
('us.census.acs.B19001015', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B19001016', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B19001017', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2015'),
('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2014'),
('us.census.tiger.block_group', 'us.census.tiger.block_group', '2014'),
('us.census.tiger.zcta5', 'us.census.tiger.zcta5', '2014'),
('us.census.tiger.county', 'us.census.tiger.county', '2014'),
('us.census.tiger.block_group', 'us.census.tiger.block_group', '2015'),
('us.census.tiger.zcta5', 'us.census.tiger.zcta5', '2015'),
('us.census.tiger.county', 'us.census.tiger.county', '2015'),
('us.census.acs.B01001002', 'us.census.tiger.block_group', '2010 - 2014'),
('us.census.acs.B01003001', 'us.census.tiger.census_tract', '2010 - 2014'),
('us.census.acs.B01003001_quantile', 'us.census.tiger.census_tract', '2010 - 2014'),
@ -204,6 +205,7 @@ def dump(cols, tablename, where=''):
' | sed "s:SET search_path.*::" '
' | sed "s:CREATE TABLE :CREATE TABLE observatory.:" '
' | sed "s:ALTER TABLE.*OWNER.*::" '
' | sed "s:SET idle_in_transaction_session_timeout.*::" '
' >> {outfile}'.format(
tablename=tablename,
outfile=OUTFILE_PATH,

View File

@ -361,7 +361,7 @@ BEGIN
(unnest($3))->>'geom_id' geom_id,
(unnest($3))->>'timespan' timespan
), meta AS (SELECT
id,
id, geom_tid,
numer_aggregate, numer_colname, numer_geomref_colname,
numer_tablename, denom_aggregate, denom_colname, denom_geomref_colname,
denom_tablename, geom_colname, geom_geomref_colname,
@ -409,7 +409,8 @@ BEGIN
'geom_id', meta.geom_id
) metadata
FROM meta, scores
WHERE meta.geom_id = scores.geom_id
WHERE meta.geom_id = scores.column_id
AND meta.geom_tid = scores.table_id
) SELECT JSON_AGG(metadata ORDER BY id)
FROM groups
WHERE timespan_rank <= $4
@ -432,146 +433,150 @@ END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureData(
geom geometry(Geometry, 4326),
geom_type TEXT,
normalize TEXT,
numer_aggregate TEXT,
numer_colname TEXT,
numer_geomref_colname TEXT,
numer_tablename TEXT,
denom_colname TEXT,
denom_geomref_colname TEXT,
denom_tablename TEXT,
geom_colname TEXT,
geom_geomref_colname TEXT,
geom_tablename TEXT
)
RETURNS NUMERIC
AS $$
DECLARE
sql TEXT;
map_type TEXT;
result NUMERIC;
BEGIN
IF normalize ILIKE 'area' AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSIF normalize ILIKE 'denominator' THEN
map_type := 'denominated';
ELSE
-- defaults: area normalization for point if it's possible and none for
-- polygon or non-summable point
IF geom_type = 'point' AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSE
map_type := 'predenominated';
END IF;
END IF;
IF geom_type = 'point' THEN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I)
LIMIT 1)
SELECT numer.%I / (SELECT area FROM _geom)
FROM observatory.%I numer
WHERE numer.%I = (SELECT geom_ref FROM _geom)',
geom_colname, geom_geomref_colname, geom_tablename,
geom_colname, numer_colname, numer_tablename,
numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
FROM observatory.%I numer
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, denom_colname, denom_tablename,
denom_geomref_colname, numer_geomref_colname,
numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
ELSIF map_type = 'predenominated' THEN
sql = format('SELECT numer.%I
FROM observatory.%I numer
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
END IF;
ELSIF geom_type = 'polygon' THEN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
(ST_Area($1::Geography) / 1000000)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname, geom_tablename,
geom_colname, geom_geomref_colname, numer_colname,
numer_geomref_colname, numer_tablename, numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I),
_denom AS (SELECT denom.%I, denom.%I geom_ref
FROM observatory.%I denom
WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap
FROM _geom
WHERE _geom.geom_ref = _denom.geom_ref)
FROM _denom WHERE _denom.geom_ref = numer.%I)), 0)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
denom_colname, denom_geomref_colname, denom_tablename,
denom_geomref_colname, numer_colname, numer_geomref_colname,
denom_colname, numer_geomref_colname,
numer_tablename, numer_geomref_colname);
ELSIF map_type = 'predenominated' THEN
IF numer_aggregate NOT ILIKE 'sum' THEN
RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
numer_name, measure_id;
ELSE
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap,
geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I
)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
numer_colname, numer_geomref_colname, numer_tablename,
numer_geomref_colname);
END IF;
END IF;
END IF;
EXECUTE sql INTO result USING geom;
RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
--CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureData(
-- geom geometry(Geometry, 4326),
-- geom_type TEXT,
-- normalize TEXT,
-- numer_aggregate TEXT,
-- numer_colname TEXT,
-- numer_geomref_colname TEXT,
-- numer_tablename TEXT,
-- denom_colname TEXT,
-- denom_geomref_colname TEXT,
-- denom_tablename TEXT,
-- geom_colname TEXT,
-- geom_geomref_colname TEXT,
-- geom_tablename TEXT
--)
--RETURNS NUMERIC
--AS $$
--DECLARE
-- sql TEXT;
-- map_type TEXT;
-- result NUMERIC;
--BEGIN
--
-- IF normalize ILIKE 'area' AND numer_aggregate ILIKE 'sum' THEN
-- map_type := 'areaNormalized';
-- ELSIF normalize ILIKE 'denominator' THEN
-- map_type := 'denominated';
-- ELSE
-- -- defaults: area normalization for point if it's possible and none for
-- -- polygon or non-summable point
-- IF geom_type = 'point' AND numer_aggregate ILIKE 'sum' THEN
-- map_type := 'areaNormalized';
-- ELSE
-- map_type := 'predenominated';
-- END IF;
-- END IF;
--
-- IF geom_type = 'point' THEN
-- IF map_type = 'areaNormalized' THEN
-- sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
-- FROM observatory.%I geom
-- WHERE ST_Within($1, geom.%I)
-- LIMIT 1)
-- SELECT numer.%I / (SELECT area FROM _geom)
-- FROM observatory.%I numer
-- WHERE numer.%I = (SELECT geom_ref FROM _geom)',
-- geom_colname, geom_geomref_colname, geom_tablename,
-- geom_colname, numer_colname, numer_tablename,
-- numer_geomref_colname);
-- ELSIF map_type = 'denominated' THEN
-- sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
-- FROM observatory.%I numer
-- WHERE numer.%I =
-- (SELECT geom.%I
-- FROM observatory.%I geom
-- WHERE ST_Within($1, geom.%I) LIMIT 1)',
-- numer_colname, denom_colname, denom_tablename,
-- denom_geomref_colname, numer_geomref_colname,
-- numer_tablename, numer_geomref_colname,
-- geom_geomref_colname, geom_tablename, geom_colname);
-- ELSIF map_type = 'predenominated' THEN
-- sql = format('SELECT numer.%I
-- FROM observatory.%I numer
-- WHERE numer.%I =
-- (SELECT geom.%I
-- FROM observatory.%I geom
-- WHERE ST_Within($1, geom.%I) LIMIT 1)',
-- numer_colname, numer_tablename, numer_geomref_colname,
-- geom_geomref_colname, geom_tablename, geom_colname);
-- END IF;
-- ELSIF geom_type = 'polygon' THEN
-- IF map_type = 'areaNormalized' THEN
-- sql = format('WITH _subdivided AS (
-- SELECT ST_Subdivide($1) AS geom
-- ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
-- / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
-- FROM observatory.%I geom, _subdivided s
-- WHERE ST_Intersects(s.geom, geom.%I)
-- GROUP BY geom.%I)
-- SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
-- (ST_Area($1::Geography) / 1000000)
-- FROM observatory.%I numer
-- WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
-- geom_colname, geom_colname, geom_geomref_colname, geom_tablename,
-- geom_colname, geom_geomref_colname, numer_colname,
-- numer_geomref_colname, numer_tablename, numer_geomref_colname);
-- ELSIF map_type = 'denominated' THEN
-- sql = format('WITH _subdivided AS (
-- SELECT ST_Subdivide($1) AS geom
-- ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
-- / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
-- FROM observatory.%I geom, _subdivided s
-- WHERE ST_Intersects(s.geom, geom.%I)
-- GROUP BY geom.%I),
-- _denom AS (SELECT denom.%I, denom.%I geom_ref
-- FROM observatory.%I denom
-- WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
-- SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
-- NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap
-- FROM _geom
-- WHERE _geom.geom_ref = _denom.geom_ref)
-- FROM _denom WHERE _denom.geom_ref = numer.%I)), 0)
-- FROM observatory.%I numer
-- WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
-- geom_colname, geom_colname, geom_geomref_colname,
-- geom_tablename, geom_colname, geom_geomref_colname,
-- denom_colname, denom_geomref_colname, denom_tablename,
-- denom_geomref_colname, numer_colname, numer_geomref_colname,
-- denom_colname, numer_geomref_colname,
-- numer_tablename, numer_geomref_colname);
-- ELSIF map_type = 'predenominated' THEN
-- IF numer_aggregate NOT ILIKE 'sum' THEN
-- RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
-- numer_name, measure_id;
-- ELSE
-- sql = format('WITH _subdivided AS (
-- SELECT ST_Subdivide($1) AS geom
-- ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
-- / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap,
-- geom.%I geom_ref
-- FROM observatory.%I geom, _subdivided s
-- WHERE ST_Intersects(s.geom, geom.%I)
-- GROUP BY geom.%I
-- )
-- SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
-- FROM observatory.%I numer
-- WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
-- geom_colname, geom_colname, geom_geomref_colname,
-- geom_tablename, geom_colname, geom_geomref_colname,
-- numer_colname, numer_geomref_colname, numer_tablename,
-- numer_geomref_colname);
-- END IF;
-- END IF;
-- END IF;
--
-- EXECUTE SELECT cdb_observatory.OBS_GetMeasureDataMulti(
-- array[($1, 1)::geomval, $2
-- )
--
-- INTO result USING geom, params;
-- RETURN result;
--
--END;
--$$ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
geom geometry(Geometry, 4326),
@ -591,6 +596,7 @@ DECLARE
denom_name TEXT;
geom_name TEXT;
denom_id TEXT;
map_type TEXT;
BEGIN
IF geom IS NULL THEN
RETURN NULL;
@ -611,32 +617,42 @@ BEGIN
ST_GeometryType(geom);
END IF;
IF normalize ILIKE 'area%' THEN --AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSIF normalize ILIKE 'denom%' THEN
map_type := 'denominated';
ELSE
-- defaults: area normalization for point if it's possible and none for
-- polygon or non-summable point
IF geom_type = 'point' THEN --AND numer_aggregate ILIKE 'sum' THEN
map_type := 'areaNormalized';
ELSE
map_type := 'predenominated';
END IF;
END IF;
RAISE NOTICE 'map_type: %, geom_type: %', map_type, geom_type;
params := (SELECT cdb_observatory.OBS_GetMeasureMetaMulti(
geom, JSON_Build_Array(JSON_Build_Object('numer_id', measure_id,
'geom_id', boundary_id,
'timespan', time_span
)), 500))->>0;
'geom_id', boundary_id,
'timespan', time_span
)), 1, 1, 500));
IF params->>'geom_id' IS NULL THEN
--IF normalize IS NOT NULL THEN
params := JSON_Build_Array(JSONB_Set((params::JSONB)->0, '{normalization}', to_jsonb(map_type))::JSON);
--END IF;
RAISE NOTICE '%', params;
IF params->0->>'geom_id' IS NULL THEN
RAISE NOTICE 'No boundary found for geom';
RETURN NULL;
ELSE
RAISE NOTICE 'Using boundary %', params->>'geom_id';
RAISE NOTICE 'Using boundary %', params->0->>'geom_id';
END IF;
SELECT cdb_observatory.OBS_GetMeasureData(geom,
geom_type,
normalize,
params->>'numer_aggregate',
params->>'numer_colname',
params->>'numer_geomref_colname',
params->>'numer_tablename',
params->>'denom_colname',
params->>'denom_geomref_colname',
params->>'denom_tablename',
params->>'geom_colname',
params->>'geom_geomref_colname',
params->>'geom_tablename')
SELECT measure FROM
cdb_observatory.OBS_GetMeasureDataMulti(ARRAY[(geom, 1)::geomval], params)
AS (id INT, measure NUMERIC)
INTO result;
RETURN result;
@ -693,8 +709,9 @@ $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasureDataMulti(
--geoms Geometry(Geometry, 4326)[], params json)
geomvals geomval[], params JSON)
geomvals geomval[],
params JSON
)
RETURNS SETOF RECORD
AS $$
DECLARE
@ -730,11 +747,13 @@ BEGIN
(unnest($1))->>'geom_geomref_colname' geom_geomref_colname,
(unnest($1))->>'geom_tablename' geom_tablename,
(unnest($1))->>'geom_type' geom_type,
(unnest($1))->>'timespan' timespan
(unnest($1))->>'timespan' timespan,
(unnest($1))->>'normalization' normalization
)
SELECT String_Agg(CASE
-- denominated
WHEN denom_id IS NOT NULL THEN ' CASE ' ||
WHEN LOWER(normalization) LIKE 'denom%' OR (normalization IS NULL AND denom_id IS NOT NULL)
THEN ' CASE ' ||
-- denominated point-in-poly or user polygon is same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
@ -744,7 +763,8 @@ BEGIN
-- SUM ((numer / denom) * (% user geom in OBS geom))
' ELSE ' ||
--' NULL END '
' SUM((' || numer_tablename || '.' || numer_colname || '/NullIf(' || denom_tablename || '.' || denom_colname || ', 0)) ' ||
' SUM((' || numer_tablename || '.' || numer_colname || '/' ||
' NullIf(' || denom_tablename || '.' || denom_colname || ', 0)) ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') THEN 1 ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) THEN ' ||
' ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
@ -753,7 +773,8 @@ BEGIN
' / ST_Area(_geoms.geom))' ||
' END) END '
-- areaNormalized
WHEN numer_aggregate ILIKE 'sum' THEN ' CASE ' ||
WHEN LOWER(normalization) LIKE 'area%' OR (normalization IS NULL AND numer_aggregate ILIKE 'sum')
THEN ' CASE ' ||
-- areaNormalized point-in-poly or user polygon is the same as OBS polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
@ -763,24 +784,6 @@ BEGIN
-- SUM (numer * (% OBS geom in user geom)) / area of big geom
' ELSE ' ||
--' NULL END '
' SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) END '
-- prenormalized
ELSE ' CASE ' ||
-- predenominated point-in-poly or user polygon is the same as OBS- polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE ' ||
-- predenominated polygon interpolation
-- TODO should weight by universe instead of area
-- SUM (numer * (% user geom in OBS geom))
' SUM((' || numer_tablename || '.' || numer_colname || ') ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') THEN 1 ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) THEN ' ||
@ -789,6 +792,24 @@ BEGIN
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(_geoms.geom))' ||
' END) END '
-- prenormalized
ELSE ' CASE ' ||
-- predenominated point-in-poly or user polygon is the same as OBS- polygon
' WHEN ST_GeometryType(cdb_observatory.FIRST(_geoms.geom)) = ''ST_Point'' ' ||
--' OR cdb_observatory.FIRST(_geoms.geom = ' || geom_tablename || '.' || geom_colname || ')' ||
' THEN cdb_observatory.FIRST(' || numer_tablename || '.' || numer_colname || ') ' ||
' ELSE ' ||
-- predenominated polygon interpolation
-- TODO should weight by universe instead of area
-- SUM (numer * (% user geom in OBS geom))
' SUM(' || numer_tablename || '.' || numer_colname || ' ' ||
' * CASE WHEN ST_Within(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ') ' ||
' THEN ST_Area(_geoms.geom) / ST_Area(' || geom_tablename || '.' || geom_colname || ') ' ||
' WHEN ST_Within(' || geom_tablename || '.' || geom_colname || ', _geoms.geom) ' ||
' THEN 1 ' ||
' ELSE (ST_Area(ST_Intersection(_geoms.geom, ' || geom_tablename || '.' || geom_colname || ')) ' ||
' / ST_Area(' || geom_tablename || '.' || geom_colname || '))' ||
' END) END '
END || ':: ' || numer_type || ' AS ' || numer_colname, ', ') AS colspecs,
(SELECT String_Agg(tablename, ', ') FROM (SELECT JSONB_Object_Keys(JSONB_Object(

View File

@ -298,7 +298,7 @@ BEGIN
) SELECT available_geoms.*, score, numtiles, notnull_percent, numgeoms,
percentfill, estnumgeoms, meanmediansize
FROM available_geoms, scores
WHERE available_geoms.geom_id = scores.geom_id
WHERE available_geoms.geom_id = scores.column_id
$string$, geom_clause)
USING numer_id, denom_id, timespan, filter_tags, bounds;
RETURN;
@ -420,7 +420,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryScores(
) RETURNS TABLE (
score NUMERIC,
numtiles BIGINT,
geom_id TEXT,
table_id TEXT,
column_id TEXT,
notnull_percent NUMERIC,
numgeoms NUMERIC,
percentfill NUMERIC,
@ -454,20 +455,20 @@ BEGIN
FROM clipped_geom
GROUP BY column_id, table_id
), clipped_geom_reagg AS (
SELECT COUNT(*)::BIGINT cnt, a.column_id,
SELECT COUNT(*)::BIGINT cnt, a.column_id, a.table_id,
cdb_observatory.FIRST(pixels) first_pixel,
cdb_observatory.FIRST(notnull_pixels) first_notnull_pixel,
cdb_observatory.FIRST(tile) first_tile,
(ST_SummaryStatsAgg(clipped_tile, 1, True)).sum::Numeric sum_geoms, -- ND
(ST_SummaryStatsAgg(clipped_tile, 2, True)).mean::Numeric / 255 mean_fill --ND
--(ST_SummaryStatsAgg(clipped_tile, 2, True)).mean::Numeric / 255 mean_fill --ND
FROM clipped_geom_countagg a, clipped_geom b
WHERE a.table_id = b.table_id
AND a.column_id = b.column_id
GROUP BY a.column_id, a.table_id
), final AS (
SELECT
cnt, column_id
cnt, table_id, column_id
, (CASE WHEN first_notnull_pixel > 0
THEN first_notnull_pixel / first_pixel
ELSE 1
@ -480,7 +481,6 @@ BEGIN
* first_pixel) -- -20
END)::Numeric
AS numgeoms
, (CASE WHEN first_notnull_pixel > 0
THEN mean_fill
ELSE COALESCE(ST_Value(first_tile, 2, ST_PointOnSurface($1))::Numeric / 255, 0) -- -2

View File

@ -566,6 +566,8 @@ BEGIN
geom_ct.column_id = geom_c.id AND
geom_c.type ILIKE 'geometry' AND
geom_c.id = '%s'
ORDER BY timespan DESC
LIMIT 1
$string$, boundary_id, boundary_id);
RETURN;
-- AND geom_t.timespan = '%s' <-- put in requested year

View File

@ -15,16 +15,17 @@ DROP TABLE IF EXISTS observatory.obs_meta_timespan;
DROP TABLE IF EXISTS observatory.obs_column_table_tile;
DROP TABLE IF EXISTS observatory.obs_column_table_tile_simple;
DROP TABLE IF EXISTS observatory.obs_fcd4e4f5610f6764973ef8c0c215b2e80bec8963;
DROP TABLE IF EXISTS observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308;
DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;
DROP TABLE IF EXISTS observatory.obs_0310c639744a2014bb1af82709228f05b59e7d3d;
DROP TABLE IF EXISTS observatory.obs_7615e8622a68bfc5fe37c69c9880edfb40250103;
DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13;
DROP TABLE IF EXISTS observatory.obs_1babf5a26a1ecda5fb74963e88408f71d0364b81;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_78fb6c1d6ff6505225175922c2c389ce48d7632c;
DROP TABLE IF EXISTS observatory.obs_65f29658e096ca1485bf683f65fdbc9f05ec3c5d;
DROP TABLE IF EXISTS observatory.obs_144e8b4f906885b2e057ac4842644a553ae49c6e;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;
DROP TABLE IF EXISTS observatory.obs_fc050f0b8673cfe3c6aa1040f749eb40975691b7;
DROP TABLE IF EXISTS observatory.obs_1a098da56badf5f32e336002b0a81708c40d29cd;
DROP TABLE IF EXISTS observatory.obs_1ea93bbc109c87c676b3270789dacf7a1430db6c;
DROP TABLE IF EXISTS observatory.obs_b393b5b88c6adda634b2071a8005b03c551b609a;
DROP TABLE IF EXISTS observatory.obs_87a814e485deabe3b12545a537f693d16ca702c2;
DROP TABLE IF EXISTS observatory.obs_c4411eba732408d47d73281772dbf03d60645dec;
DROP TABLE IF EXISTS observatory.obs_1746e37b7cd28cb131971ea4187d42d71f09c5f3;
DROP TABLE IF EXISTS observatory.obs_a01cd5d8ccaa6531cef715071e9307e6b1987ec3;

File diff suppressed because one or more lines are too long

View File

@ -33,67 +33,6 @@ WITH result as (
) select expected = 0 as OBS_Get_median_income_at_null_island
from result;
-- OBS_GetPoints
-- obs_getpoints
-- --------------------
-- {4809.33511352425}
-- SELECT
-- (cdb_observatory._OBS_GetPoints(
-- cdb_observatory._TestPoint(),
-- 'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- block groups (see _obs_geomtable)
-- (Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
-- ))[1]::text = '{"value":10923.093200390833950,"name":"Total Population","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'
-- as OBS_GetPoints_for_test_point;
WITH cte As (
SELECT
(cdb_observatory._OBS_GetPoints(
cdb_observatory._TestPoint(),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- block groups (see _obs_geomtable)
(Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
))[1]
as OBS_GetPoints_for_test_point)
SELECT
(abs((OBS_GetPoints_for_test_point ->> 'value')::numeric - 10923.093200390833950) / 10923.093200390833950) < 0.001 As OBS_GetPoints_for_test_point_value,
(OBS_GetPoints_for_test_point ->> 'name') = 'Total Population' As OBS_GetPoints_for_test_point_name,
(OBS_GetPoints_for_test_point ->> 'tablename') = 'obs_1a098da56badf5f32e336002b0a81708c40d29cd' As OBS_GetPoints_for_test_point_tablename,
(OBS_GetPoints_for_test_point ->> 'aggregate') = 'sum' As OBS_GetPoints_for_test_point_aggregate,
(OBS_GetPoints_for_test_point ->> 'type') = 'Numeric' As OBS_GetPoints_for_test_point_type,
(OBS_GetPoints_for_test_point ->> 'description') = 'The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates.' As OBS_GetPoints_for_test_point_description
FROM cte;
-- what happens at null island
SELECT
(cdb_observatory._OBS_GetPoints(
ST_SetSRID(ST_Point(0, 0), 4326),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
(Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
))[1]::text is null
as OBS_GetPoints_for_null_island;
-- OBS_GetPolygons
-- obs_getpolygons
-- --------------------
-- {12996.8172420752}
SELECT
(cdb_observatory._OBS_GetPolygons(
cdb_observatory._TestArea(),
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json]
))[1]::text = '{"value":12327.3133495107,"name":"Total Population","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'
as OBS_GetPolygons_for_test_point;
-- see what happens around null island
SELECT
((cdb_observatory._OBS_GetPolygons(
ST_Buffer(ST_SetSRID(ST_Point(0, 0), 4326)::geography, 500)::geometry,
'obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308'::text, -- see example in obs_geomtable
Array['{"colname":"total_pop","tablename":"obs_1a098da56badf5f32e336002b0a81708c40d29cd","aggregate":"sum","name":"Total Population","type":"Numeric","description":"The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates."}'::json])
)[1]->>'value') is null
as OBS_GetPolygons_for_null_island;
SELECT cdb_observatory.OBS_GetSegmentSnapshot(
cdb_observatory._TestPoint(),
'us.census.tiger.census_tract'
@ -196,7 +135,7 @@ SELECT (abs(cdb_observatory.OBS_GetMeasure(
-- Poly-based OBS_GetMeasure with denominator normalization
SELECT abs(cdb_observatory.OBS_GetMeasure(
cdb_observatory._TestArea(),
'us.census.acs.B01001002', 'denominator') - 0.49026340444793965457) / 0.49026340444793965457 < 0.001 As OBS_GetMeasure_total_male_poly_denominator;
'us.census.acs.B01001002', 'denominator', null, '2010 - 2014') - 0.49026340444793965457) / 0.49026340444793965457 < 0.001 As OBS_GetMeasure_total_male_poly_denominator;
-- Poly-based OBS_GetMeasure with one very bad geom
SELECT abs(cdb_observatory.OBS_GetMeasure(

View File

@ -350,7 +350,7 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014;
-- _OBS_GetGeometryScores tests
--
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_500m_buffer
@ -359,7 +359,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_5km_buffer
@ -368,7 +368,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_50km_buffer
@ -377,7 +377,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer
@ -386,7 +386,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_2500km_buffer
@ -395,7 +395,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text
= '{ "us.census.tiger.block_group" : 9, "us.census.tiger.census_tract" : 3, "us.census.tiger.zcta5" : 0, "us.census.tiger.county" : 0 }'
AS _obs_geometryscores_numgeoms_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
@ -403,7 +403,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 899, "us.census.tiger.census_tract" : 328, "us.census.tiger.zcta5" : 45, "us.census.tiger.county" : 1 }'
AS _obs_geometryscores_numgeoms_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
@ -411,7 +411,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 12112, "us.census.tiger.census_tract" : 3792, "us.census.tiger.zcta5" : 550, "us.census.tiger.county" : 14 }'
AS _obs_geometryscores_numgeoms_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
@ -419,7 +419,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 48420, "us.census.tiger.census_tract" : 15774, "us.census.tiger.zcta5" : 6533, "us.census.tiger.county" : 304 }'
AS _obs_geometryscores_numgeoms_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
@ -427,7 +427,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
SELECT JSON_Object_Agg(column_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 165475, "us.census.tiger.census_tract" : 55128, "us.census.tiger.zcta5" : 26499, "us.census.tiger.county" : 2622 }'
AS _obs_geometryscores_numgeoms_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
@ -435,7 +435,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms
@ -444,7 +444,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
SELECT ARRAY_AGG(column_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
@ -453,7 +453,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_2500_geoms
@ -462,7 +462,7 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'], 2500);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
SELECT ARRAY_AGG(column_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_25000_geoms

View File

@ -73,7 +73,7 @@ SELECT cdb_observatory.OBS_GetBoundaryId(
SELECT cdb_observatory.OBS_GetBoundaryId(
cdb_observatory._TestPoint(),
'us.census.tiger.county',
'2014'
'2015'
) = '36047'::text As OBS_GetBoundaryId_cartodb_county_with_year;
-- should give back null since there is not a census tract at null island
@ -335,7 +335,7 @@ FROM (
SELECT
geoid_colname = 'geoid' As geoid_name_matches,
target_table = 'obs_fc050f0b8673cfe3c6aa1040f749eb40975691b7' As table_name_matches,
target_table = 'obs_87a814e485deabe3b12545a537f693d16ca702c2' As table_name_matches,
geom_colname = 'the_geom' As geom_name_matches
FROM cdb_observatory._OBS_GetGeometryMetadata('us.census.tiger.census_tract')
As m(geoid_colname, target_table, geom_colname);

View File

@ -23,11 +23,11 @@ for q in (
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326),
st_makeenvelope(-74.1, 40.5,
-73.8, 40.9, 4326),
'us.census.tiger.census_tract_clipped')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 500''',
LIMIT 1000''',
'DROP TABLE IF EXISTS obs_perftest_complex',
'''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY,
point GEOMETRY,
@ -101,159 +101,159 @@ def record(params, results):
}
json.dump(tests, fhandle)
@parameterized([
('simple', '_OBS_GetGeometryScores', 'NULL', 1),
('simple', '_OBS_GetGeometryScores', 'NULL', 500),
('simple', '_OBS_GetGeometryScores', 'NULL', 3000),
('complex', '_OBS_GetGeometryScores', 'NULL', 1),
('complex', '_OBS_GetGeometryScores', 'NULL', 500),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 1),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 500),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 5000),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 1),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 500),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 5000),
])
def test_getgeometryscores_performance(geom_complexity, api_method, filters, target_geoms):
print api_method, geom_complexity, filters, target_geoms
rownums = (1, 5, 10, ) if 'complex' in geom_complexity else (5, 25, 50,)
results = []
for rows in rownums:
stmt = '''SELECT {schema}{api_method}(geom, {filters}, {target_geoms})
FROM obs_perftest_{complexity}
WHERE cartodb_id <= {n}'''.format(
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
filters=filters,
target_geoms=target_geoms,
n=rows)
start = time()
query(stmt)
end = time()
qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': api_method,
'filters': filters,
'target_geoms': target_geoms
}, results)
@parameterized([
('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract', None),
('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county', None),
('simple', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
('simple', 'OBS_GetCategory', None, 'point', 'NULL'),
('simple', 'OBS_GetCategory', None, 'geom', 'NULL'),
('simple', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
('simple', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
('simple', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
('complex', 'OBS_GetCategory', None, 'point', 'NULL'),
('complex', 'OBS_GetCategory', None, 'geom', 'NULL'),
('complex', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
('complex', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
('complex', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
('complex', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
])
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom, boundary):
print api_method, geom_complexity, normalization, geom, boundary
col = 'measure' if 'measure' in api_method.lower() else 'category'
results = []
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50, )
for rows in rownums:
stmt = '''UPDATE obs_perftest_{complexity}
SET {col} = {schema}{api_method}({args})
WHERE cartodb_id <= {n}'''.format(
col=col,
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
args=ARGS[api_method, normalization].format(geom, boundary),
n=rows)
start = time()
query(stmt)
end = time()
qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': api_method,
'normalization': normalization,
'geom': geom
}, results)
#@parameterized([
# ('simple', '_OBS_GetGeometryScores', 'NULL', 1),
# ('simple', '_OBS_GetGeometryScores', 'NULL', 500),
# ('simple', '_OBS_GetGeometryScores', 'NULL', 3000),
#
# ('complex', '_OBS_GetGeometryScores', 'NULL', 1),
# ('complex', '_OBS_GetGeometryScores', 'NULL', 500),
# ('complex', '_OBS_GetGeometryScores', 'NULL', 3000),
#
# ('country_simple', '_OBS_GetGeometryScores', 'NULL', 1),
# ('country_simple', '_OBS_GetGeometryScores', 'NULL', 500),
# ('country_simple', '_OBS_GetGeometryScores', 'NULL', 5000),
#
# ('country_complex', '_OBS_GetGeometryScores', 'NULL', 1),
# ('country_complex', '_OBS_GetGeometryScores', 'NULL', 500),
# ('country_complex', '_OBS_GetGeometryScores', 'NULL', 5000),
#])
#def test_getgeometryscores_performance(geom_complexity, api_method, filters, target_geoms):
# print api_method, geom_complexity, filters, target_geoms
#
# rownums = (1, 5, 10, ) if 'complex' in geom_complexity else (5, 25, 50,)
# results = []
# for rows in rownums:
# stmt = '''SELECT {schema}{api_method}(geom, {filters}, {target_geoms})
# FROM obs_perftest_{complexity}
# WHERE cartodb_id <= {n}'''.format(
# complexity=geom_complexity,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# api_method=api_method,
# filters=filters,
# target_geoms=target_geoms,
# n=rows)
# start = time()
# query(stmt)
# end = time()
# qps = (rows / (end - start))
# results.append({
# 'rows': rows,
# 'qps': qps,
# 'stmt': stmt
# })
# print rows, ': ', qps, ' QPS'
#
# if 'OBS_RECORD_TEST' in os.environ:
# record({
# 'geom_complexity': geom_complexity,
# 'api_method': api_method,
# 'filters': filters,
# 'target_geoms': target_geoms
# }, results)
#
#@parameterized([
# ('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract', None),
# ('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county', None),
#
# ('simple', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
# ('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
# ('simple', 'OBS_GetCategory', None, 'point', 'NULL'),
# ('simple', 'OBS_GetCategory', None, 'geom', 'NULL'),
# ('simple', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
#
# ('simple', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
# ('simple', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
#
# ('complex', 'OBS_GetMeasure', 'predenominated', 'point', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'predenominated', 'geom', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'area', 'point', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'area', 'geom', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'area', 'offset_geom', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'denominator', 'point', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'denominator', 'geom', 'NULL'),
# ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', 'NULL'),
# ('complex', 'OBS_GetCategory', None, 'point', 'NULL'),
# ('complex', 'OBS_GetCategory', None, 'geom', 'NULL'),
# ('complex', 'OBS_GetCategory', None, 'offset_geom', 'NULL'),
#
# ('complex', 'OBS_GetMeasure', 'predenominated', 'point', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'predenominated', 'geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'area', 'point', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'area', 'geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'area', 'offset_geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'denominator', 'point', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'denominator', 'geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom', "'us.census.tiger.county'"),
# ('complex', 'OBS_GetCategory', None, 'point', "'us.census.tiger.census_tract'"),
# ('complex', 'OBS_GetCategory', None, 'geom', "'us.census.tiger.census_tract'"),
# ('complex', 'OBS_GetCategory', None, 'offset_geom', "'us.census.tiger.census_tract'"),
#])
#def test_getmeasure_performance(geom_complexity, api_method, normalization, geom, boundary):
# print api_method, geom_complexity, normalization, geom, boundary
# col = 'measure' if 'measure' in api_method.lower() else 'category'
# results = []
#
# rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50, )
# for rows in rownums:
# stmt = '''UPDATE obs_perftest_{complexity}
# SET {col} = {schema}{api_method}({args})
# WHERE cartodb_id <= {n}'''.format(
# col=col,
# complexity=geom_complexity,
# schema='cdb_observatory.' if USE_SCHEMA else '',
# api_method=api_method,
# args=ARGS[api_method, normalization].format(geom, boundary),
# n=rows)
# start = time()
# query(stmt)
# end = time()
# qps = (rows / (end - start))
# results.append({
# 'rows': rows,
# 'qps': qps,
# 'stmt': stmt
# })
# print rows, ': ', qps, ' QPS'
#
# if 'OBS_RECORD_TEST' in os.environ:
# record({
# 'geom_complexity': geom_complexity,
# 'api_method': api_method,
# 'normalization': normalization,
# 'geom': geom
# }, results)
@parameterized([
('simple', 'predenominated', 'point', 'NULL'),
('simple', 'predenominated', 'geom', 'NULL'),
('simple', 'predenominated', 'offset_geom', 'NULL'),
('simple', 'area', 'point', 'NULL'),
('simple', 'area', 'geom', 'NULL'),
('simple', 'area', 'offset_geom', 'NULL'),
('simple', 'denominator', 'point', 'NULL'),
('simple', 'denominator', 'geom', 'NULL'),
('simple', 'denominator', 'offset_geom', 'NULL'),
('simple', 'predenominated', 'point', 'null'),
('simple', 'predenominated', 'geom', 'null'),
('simple', 'predenominated', 'offset_geom', 'null'),
('simple', 'area', 'point', 'null'),
('simple', 'area', 'geom', 'null'),
('simple', 'area', 'offset_geom', 'null'),
('simple', 'denominator', 'point', 'null'),
('simple', 'denominator', 'geom', 'null'),
('simple', 'denominator', 'offset_geom', 'null'),
('simple', 'predenominated', 'point', "'us.census.tiger.census_tract'"),
('simple', 'predenominated', 'geom', "'us.census.tiger.census_tract'"),
@ -289,17 +289,23 @@ def test_getmeasure_split_performance(geom_complexity, normalization, geom, boun
print geom_complexity, normalization, geom, boundary
results = []
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50, )
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (100, 500, 1000)
for rows in rownums:
stmt = '''
with data as (
SELECT * FROM {schema}{api_method}datamulti(
(SELECT array_agg((geom, cartodb_id)::geomval)
(SELECT array_agg(({geom}, cartodb_id)::geomval)
FROM obs_perftest_{complexity}
WHERE cartodb_id <= {n}),
(SELECT {schema}{api_method}metamulti(
(SELECT st_setsrid(st_extent(geom),4326) from obs_perftest_{complexity}),
json_build_array(json_build_object('numer_id', 'us.census.acs.B01001002'))
(SELECT st_setsrid(st_extent({geom}), 4326)
FROM obs_perftest_{complexity}
WHERE cartodb_id <= {n}),
'[{{
"numer_id": "us.census.acs.B01001002",
"normalization": "{normalization}",
"geom_id": {boundary}
}}]'::JSON
))
)
AS x(cartodb_id INTEGER, measure Numeric))
@ -315,7 +321,7 @@ WHERE obs_perftest_{complexity}.cartodb_id = data.cartodb_id
api_method='obs_getmeasure',
normalization=normalization,
geom=geom,
boundary=boundary,
boundary=boundary.replace("'", '"'),
n=rows)
start = time()
query(stmt)