Merge pull request #236 from CartoDB/develop

Release 1.1.6
This commit is contained in:
Mario de Frutos 2016-12-12 16:24:31 +01:00 committed by GitHub
commit f378e75d4c
6 changed files with 256 additions and 149 deletions

26
NEWS.md
View File

@ -1,3 +1,29 @@
1.1.6 (2016-12-08)
__Bugfixes__
* Fix divide by zero condition in "denominator" branch of `OBS_GetMeasure`
when passing in a polygon ([#233](https://github.com/CartoDB/observatory-extension/pull/233)).
__Improvements__
* Use `ST_Subdivide` to improve performance when functions are called on very
complex geometries (with many points) ([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Improve raster scoring to more heavily weight boundaries with nearer to
correct number of points, and penalize boundaries with lots of blank space
([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Remove some redundant area calculations in `OBS_GetMeasure`
([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Replace use of `format('%L', var)` with proper use of `EXECUTE` and `$1` etc.
variables ([#231](https://github.com/CartoDB/observatory-extension/pull/231))
* Add test point for Brazil
([#229](https://github.com/CartoDB/observatory-extension/pull/229))
* Improvements to performance tests
([#229](https://github.com/CartoDB/observatory-extension/pull/229))
- Support simple and complex geometries
- Handle all code branches
- Add ability to persist results to JSON for graph visualization later
1.1.5 (2016-11-29) 1.1.5 (2016-11-29)
__Bugfixes__ __Bugfixes__

View File

@ -232,27 +232,25 @@ BEGIN
-- we *really* should pass in both geom_table_name and boundary_id -- we *really* should pass in both geom_table_name and boundary_id
-- TODO tablename should not be passed here (use boundary_id) -- TODO tablename should not be passed here (use boundary_id)
EXECUTE EXECUTE
format('SELECT ct.colname 'SELECT ct.colname
FROM observatory.obs_column_to_column c2c, FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct, observatory.obs_column_table ct,
observatory.obs_table t observatory.obs_table t
WHERE c2c.reltype = ''geom_ref'' WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id AND ct.column_id = c2c.source_id
AND ct.table_id = t.id AND ct.table_id = t.id
AND t.tablename = %L' AND t.tablename = $1'
, (data_table_info)[1]->>'tablename') INTO data_geoid_colname USING (data_table_info)[1]->>'tablename';
INTO data_geoid_colname;
EXECUTE EXECUTE
format('SELECT ct.colname 'SELECT ct.colname
FROM observatory.obs_column_to_column c2c, FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct, observatory.obs_column_table ct,
observatory.obs_table t observatory.obs_table t
WHERE c2c.reltype = ''geom_ref'' WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id AND ct.column_id = c2c.source_id
AND ct.table_id = t.id AND ct.table_id = t.id
AND t.tablename = %L' AND t.tablename = $1'
, geom_table_name) INTO geom_geoid_colname USING geom_table_name;
INTO geom_geoid_colname;
EXECUTE EXECUTE
format('SELECT %I format('SELECT %I
@ -268,11 +266,10 @@ BEGIN
EXECUTE EXECUTE
format('SELECT ST_Area(the_geom::geography) / (1000 * 1000) format('SELECT ST_Area(the_geom::geography) / (1000 * 1000)
FROM observatory.%I FROM observatory.%I
WHERE %I = %L', WHERE %I = $1',
geom_table_name, geom_table_name,
geom_geoid_colname, geom_geoid_colname)
geoid) INTO area USING geoid;
INTO area;
IF area IS NULL IF area IS NULL
THEN THEN
@ -343,7 +340,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
measure_id TEXT, measure_id TEXT,
normalize TEXT DEFAULT NULL, normalize TEXT DEFAULT NULL,
boundary_id TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL,
time_span TEXT DEFAULT NULL time_span TEXT DEFAULT NULL,
simplification NUMERIC DEFAULT 0.00001
) )
RETURNS NUMERIC RETURNS NUMERIC
AS $$ AS $$
@ -369,13 +367,16 @@ BEGIN
RETURN NULL; RETURN NULL;
END IF; END IF;
geom := ST_SnapToGrid(geom, 0.000001); IF simplification IS NOT NULL THEN
geom := ST_Simplify(geom, simplification);
END IF;
IF ST_GeometryType(geom) = 'ST_Point' THEN IF ST_GeometryType(geom) = 'ST_Point' THEN
geom_type := 'point'; geom_type := 'point';
ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN
geom_type := 'polygon'; geom_type := 'polygon';
geom := ST_Buffer(geom, 0.000001); --geom := ST_Buffer(geom, 0.000001);
geom := ST_CollectionExtract(ST_MakeValid(geom), 3);
ELSE ELSE
RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''', RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''',
ST_GeometryType(geom); ST_GeometryType(geom);
@ -435,103 +436,101 @@ BEGIN
IF map_type = 'areaNormalized' THEN IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
FROM observatory.%I geom FROM observatory.%I geom
WHERE ST_Within(%L, geom.%I) WHERE ST_Within($1, geom.%I)
LIMIT 1) LIMIT 1)
SELECT numer.%I / (SELECT area FROM _geom) SELECT numer.%I / (SELECT area FROM _geom)
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = (SELECT geom_ref FROM _geom)', WHERE numer.%I = (SELECT geom_ref FROM _geom)',
geom_colname, geom_geomref_colname, geom_tablename, geom_colname, geom_geomref_colname, geom_tablename,
geom, geom_colname, numer_colname, numer_tablename, geom_colname, numer_colname, numer_tablename,
numer_geomref_colname); numer_geomref_colname);
ELSIF map_type = 'denominated' THEN ELSIF map_type = 'denominated' THEN
sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0) sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, denom_colname, denom_tablename, numer_colname, denom_colname, denom_tablename,
denom_geomref_colname, numer_geomref_colname, denom_geomref_colname, numer_geomref_colname,
numer_tablename, numer_tablename, numer_geomref_colname,
numer_geomref_colname, geom_geomref_colname, geom_geomref_colname, geom_tablename, geom_colname);
geom_tablename, geom, geom_colname);
ELSIF map_type = 'predenominated' THEN ELSIF map_type = 'predenominated' THEN
sql = format('SELECT numer.%I sql = format('SELECT numer.%I
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', WHERE numer.%I =
numer_colname, numer_tablename, (SELECT geom.%I
numer_geomref_colname, geom_geomref_colname, geom_tablename, FROM observatory.%I geom
geom, geom_colname); WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
END IF; END IF;
ELSIF geom_type = 'polygon' THEN ELSIF geom_type = 'polygon' THEN
IF map_type = 'areaNormalized' THEN IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) sql = format('WITH _subdivided AS (
/ ST_Area(geom.%I) overlap, geom.%I geom_ref SELECT ST_Subdivide($1) AS geom
FROM observatory.%I geom ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
WHERE ST_Intersects(%L, geom.%I) / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
(ST_Area(%L::Geography) / 1000000) (ST_Area($1::Geography) / 1000000)
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname, geom_colname, geom_colname, geom_colname, geom_geomref_colname, geom_tablename,
geom_geomref_colname, geom_tablename, geom_colname, geom_geomref_colname, numer_colname,
geom, geom_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname);
geom, geom_colname, geom_colname,
numer_colname, numer_geomref_colname,
geom, numer_tablename,
numer_geomref_colname);
ELSIF map_type = 'denominated' THEN ELSIF map_type = 'denominated' THEN
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) sql = format('WITH _subdivided AS (
/ ST_Area(geom.%I) overlap, geom.%I geom_ref SELECT ST_Subdivide($1) AS geom
FROM observatory.%I geom ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
WHERE ST_Intersects(%L, geom.%I) / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0), FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I),
_denom AS (SELECT denom.%I, denom.%I geom_ref _denom AS (SELECT denom.%I, denom.%I geom_ref
FROM observatory.%I denom FROM observatory.%I denom
WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
SUM((SELECT _denom.%I * (SELECT _geom.overlap NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap
FROM _geom FROM _geom
WHERE _geom.geom_ref = _denom.geom_ref) WHERE _geom.geom_ref = _denom.geom_ref)
FROM _denom WHERE _denom.geom_ref = numer.%I)) FROM _denom WHERE _denom.geom_ref = numer.%I)), 0)
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname, geom_colname, geom_colname, geom_geomref_colname,
geom_colname, geom_geomref_colname, geom_tablename, geom_colname, geom_geomref_colname,
geom_tablename, denom_colname, denom_geomref_colname, denom_tablename,
geom, geom_colname, denom_geomref_colname, numer_colname, numer_geomref_colname,
geom, geom_colname, geom_colname, denom_colname, numer_geomref_colname,
denom_colname, denom_geomref_colname, numer_tablename, numer_geomref_colname);
denom_tablename,
denom_geomref_colname,
numer_colname, numer_geomref_colname,
denom_colname,
numer_geomref_colname,
numer_tablename,
numer_geomref_colname);
ELSIF map_type = 'predenominated' THEN ELSIF map_type = 'predenominated' THEN
IF numer_aggregate NOT ILIKE 'sum' THEN IF numer_aggregate NOT ILIKE 'sum' THEN
RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead', RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
numer_name, measure_id; numer_name, measure_id;
ELSE ELSE
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) sql = format('WITH _subdivided AS (
/ ST_Area(geom.%I) overlap, geom.%I geom_ref SELECT ST_Subdivide($1) AS geom
FROM observatory.%I geom ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
WHERE ST_Intersects(%L, geom.%I) / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap,
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I
)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
FROM observatory.%I numer FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname, geom_colname, geom_colname, geom_colname, geom_geomref_colname,
geom_geomref_colname, geom_tablename, geom_tablename, geom_colname, geom_geomref_colname,
geom, geom_colname, numer_colname, numer_geomref_colname, numer_tablename,
geom, geom_colname, geom_colname,
numer_colname, numer_geomref_colname,
numer_tablename,
numer_geomref_colname); numer_geomref_colname);
END IF; END IF;
END IF; END IF;
END IF; END IF;
EXECUTE sql INTO result; EXECUTE sql INTO result USING geom;
RETURN result; RETURN result;
END; END;
@ -574,11 +573,11 @@ BEGIN
EXECUTE format( EXECUTE format(
'SELECT %I 'SELECT %I
FROM observatory.%I data FROM observatory.%I data
WHERE data.%I = %L', WHERE data.%I = $1',
colname, colname,
target_table, target_table,
data_geoid_colname, geom_ref) data_geoid_colname)
INTO measure_val; INTO measure_val USING geom_ref;
RETURN measure_val; RETURN measure_val;
@ -627,28 +626,27 @@ BEGIN
'SELECT data.%I 'SELECT data.%I
FROM observatory.%I data, observatory.%I geom FROM observatory.%I data, observatory.%I geom
WHERE data.%I = geom.%I WHERE data.%I = geom.%I
AND ST_WITHIN(%L, geom.%I) ', AND ST_WITHIN($1, geom.%I) ',
colname, data_table, geom_table, data_geomref_colname, colname, data_table, geom_table, data_geomref_colname,
geom_geomref_colname, geom, geom_colname) geom_geomref_colname, geom_colname)
INTO category_val; INTO category_val USING geom;
ELSE ELSE
-- favor the category with the most area -- favor the category with the most area
EXECUTE format( EXECUTE format(
'SELECT data.%I category, SUM(overlap_fraction) category_share 'SELECT data.%I category, SUM(overlap_fraction) category_share
FROM observatory.%I data, ( FROM observatory.%I data, (
SELECT ST_Area( SELECT ST_Area(
ST_Intersection(%L, a.%I) ST_Intersection($1, a.%I)
) / ST_Area(%L) AS overlap_fraction, a.%I geomref ) / ST_Area($1) AS overlap_fraction, a.%I geomref
FROM observatory.%I as a FROM observatory.%I as a
WHERE %L && a.%I) _overlaps WHERE $1 && a.%I) _overlaps
WHERE data.%I = _overlaps.geomref WHERE data.%I = _overlaps.geomref
GROUP BY category GROUP BY category
ORDER BY SUM(overlap_fraction) DESC ORDER BY SUM(overlap_fraction) DESC
LIMIT 1', LIMIT 1',
colname, data_table, colname, data_table, geom_colname, geom_geomref_colname,
geom, geom_colname, geom, geom_geomref_colname, geom_table, geom_colname, data_geomref_colname)
geom_table, geom, geom_colname, data_geomref_colname) INTO category_val, category_share USING geom;
INTO category_val, category_share;
END IF; END IF;
RETURN category_val; RETURN category_val;
@ -738,10 +736,11 @@ BEGIN
-- TODO use a super-column for global pop -- TODO use a super-column for global pop
population_measure_id := 'us.census.acs.B01003001'; population_measure_id := 'us.census.acs.B01003001';
EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure( EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure(
%L, %L, %L, %L, %L $1, $2, $3, $4, $5
) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span) ) LIMIT 1'
INTO result; INTO result
USING geom, population_measure_id, normalize, boundary_id, time_span;
return result; return result;
END; END;
@ -770,27 +769,25 @@ BEGIN
-- we *really* should pass in both geom_table_name and boundary_id -- we *really* should pass in both geom_table_name and boundary_id
-- TODO tablename should not be passed here (use boundary_id) -- TODO tablename should not be passed here (use boundary_id)
EXECUTE EXECUTE
format('SELECT ct.colname 'SELECT ct.colname
FROM observatory.obs_column_to_column c2c, FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct, observatory.obs_column_table ct,
observatory.obs_table t observatory.obs_table t
WHERE c2c.reltype = ''geom_ref'' WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id AND ct.column_id = c2c.source_id
AND ct.table_id = t.id AND ct.table_id = t.id
AND t.tablename = %L' AND t.tablename = $1'
, (data_table_info)[1]->>'tablename') INTO data_geoid_colname USING (data_table_info)[1]->>'tablename';
INTO data_geoid_colname;
EXECUTE EXECUTE
format('SELECT ct.colname 'SELECT ct.colname
FROM observatory.obs_column_to_column c2c, FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct, observatory.obs_column_table ct,
observatory.obs_table t observatory.obs_table t
WHERE c2c.reltype = ''geom_ref'' WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id AND ct.column_id = c2c.source_id
AND ct.table_id = t.id AND ct.table_id = t.id
AND t.tablename = %L' AND t.tablename = $1'
, geom_table_name) INTO geom_geoid_colname USING geom_table_name;
INTO geom_geoid_colname;
q_select := format('SELECT %I, ', data_geoid_colname); q_select := format('SELECT %I, ', data_geoid_colname);
q_sum := 'SELECT Array['; q_sum := 'SELECT Array[';

View File

@ -431,10 +431,7 @@ BEGIN
RETURN QUERY RETURN QUERY
EXECUTE format($string$ EXECUTE format($string$
SELECT SELECT
(1 / (abs(numgeoms - $3) ((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric
--* (1 / Coalesce(NullIf(notnull_percent, 0), 1))
--* (1 / Coalesce(NullIf(percentfill, 0), 0.0001))
))::Numeric
AS score, * AS score, *
FROM ( FROM (
WITH clipped_geom AS ( WITH clipped_geom AS (

View File

@ -352,25 +352,25 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014;
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'] 'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_500m_buffer AS _obs_geometryscores_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']); 'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county'] 'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_5km_buffer AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']); 'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.county', 'us.census.tiger.block_group'] 'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_50km_buffer AS _obs_geometryscores_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -378,8 +378,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county']); 'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY[ 'us.census.tiger.county', 'us.census.tiger.zcta5', ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.census_tract', 'us.census.tiger.block_group'] 'us.census.tiger.county', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer AS _obs_geometryscores_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']); 'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5', ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.census_tract', 'us.census.tiger.block_group'] 'us.census.tiger.zcta5', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms AS _obs_geometryscores_500km_buffer_50_geoms
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -445,8 +445,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50); 'us.census.tiger.zcta5', 'us.census.tiger.county'], 50);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.county', = ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.census_tract', 'us.census.tiger.block_group'] 'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -454,8 +454,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500); 'us.census.tiger.zcta5', 'us.census.tiger.county'], 500);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.county', 'us.census.tiger.block_group'] 'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_2500_geoms AS _obs_geometryscores_500km_buffer_2500_geoms
FROM cdb_observatory._OBS_GetGeometryScores( FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),

View File

@ -34,6 +34,7 @@ AND 'us.census.acs.acs' = ANY (subsection_tags)
AND numer_weight > 0 AND numer_weight > 0
''').fetchall() ''').fetchall()
SKIP_COLUMNS = set([ SKIP_COLUMNS = set([
u'mx.inegi_columns.INDI18', u'mx.inegi_columns.INDI18',
u'mx.inegi_columns.ECO40', u'mx.inegi_columns.ECO40',
@ -61,6 +62,10 @@ SKIP_COLUMNS = set([
u'mx.inegi_columns.POB33', u'mx.inegi_columns.POB33',
u'mx.inegi_columns.POB58', u'mx.inegi_columns.POB58',
u'mx.inegi_columns.DISC4', u'mx.inegi_columns.DISC4',
u'mx.inegi_columns.VIV41',
u'mx.inegi_columns.VIV40',
u'mx.inegi_columns.VIV17',
u'mx.inegi_columns.EDU10'
]) ])
#def default_geometry_id(column_id): #def default_geometry_id(column_id):
@ -146,7 +151,8 @@ def default_lonlat(column_id):
return (40.7, -73.9) return (40.7, -73.9)
elif column_id.startswith('eu.'): elif column_id.startswith('eu.'):
raise SkipTest('No tests for Eurostat!') raise SkipTest('No tests for Eurostat!')
#return (52.52207036136366, 13.40606689453125) elif column_id.startswith('br.'):
return (-23.53, -46.63)
else: else:
raise Exception('No catalog point set for {}'.format(column_id)) raise Exception('No catalog point set for {}'.format(column_id))

View File

@ -5,17 +5,47 @@ from util import query, commit
from time import time from time import time
import json
import os
USE_SCHEMA = True USE_SCHEMA = True
for q in ( for q in (
'DROP TABLE IF EXISTS obs_censustest', 'DROP TABLE IF EXISTS obs_perftest_simple',
'''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY, '''CREATE TABLE obs_perftest_simple (cartodb_id SERIAL PRIMARY KEY,
the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''', point GEOMETRY,
'''INSERT INTO obs_censustest (the_geom, name) geom GEOMETRY,
SELECT * FROM {schema}OBS_GetBoundariesByGeometry( offset_geom GEOMETRY,
name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_perftest_simple (point, geom, offset_geom, name)
SELECT ST_PointOnSurface(the_geom) point,
the_geom geom,
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881, st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326), -73.81885528564453,40.745696344339564, 4326),
'us.census.tiger.block_group_clipped') As m(the_geom, geoid)''' 'us.census.tiger.census_tract_clipped')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 500''',
'DROP TABLE IF EXISTS obs_perftest_complex',
'''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY,
point GEOMETRY,
geom GEOMETRY,
offset_geom GEOMETRY,
name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_perftest_complex (point, geom, offset_geom, name)
SELECT ST_PointOnSurface(the_geom) point,
the_geom geom,
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-75.05437469482422,40.66319159533881,
-73.81885528564453,41.745696344339564, 4326),
'us.census.tiger.county_clipped')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
#'''SET statement_timeout = 5000;'''
): ):
query(q.format( query(q.format(
schema='cdb_observatory.' if USE_SCHEMA else '', schema='cdb_observatory.' if USE_SCHEMA else '',
@ -24,39 +54,90 @@ for q in (
ARGS = { ARGS = {
'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'", ('OBS_GetMeasureByID', None): "name, 'us.census.acs.B01001002', '{}'",
'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'", ('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001'",
'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'", ('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'",
('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'",
('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'",
} }
GEOMS = {
'point': 'ST_PointOnSurface(the_geom)', def record(params, results):
'polygon_match': 'the_geom', sha = os.environ['OBS_EXTENSION_SHA']
'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)', fpath = os.path.join(os.environ['OBS_PERFTEST_DIR'], sha + '.json')
} if os.path.isfile(fpath):
tests = json.load(open(fpath, 'r'))
else:
tests = {}
with open(fpath, 'w') as fhandle:
tests[json.dumps(params)] = {
'params': params,
'results': results
}
json.dump(tests, fhandle)
@parameterized([ @parameterized([
('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'), ('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'),
('OBS_GetMeasureByID', 'us.census.tiger.county'), ('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'),
('OBS_GetMeasure', GEOMS['point']),
('OBS_GetMeasure', GEOMS['polygon_match']), ('simple', 'OBS_GetMeasure', 'predenominated', 'point'),
('OBS_GetMeasure', GEOMS['polygon_buffered']), ('simple', 'OBS_GetMeasure', 'predenominated', 'geom'),
('OBS_GetCategory', GEOMS['point']), ('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('OBS_GetCategory', GEOMS['polygon_match']), ('simple', 'OBS_GetMeasure', 'area', 'point'),
('OBS_GetCategory', GEOMS['polygon_buffered']), ('simple', 'OBS_GetMeasure', 'area', 'geom'),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'point'),
('simple', 'OBS_GetMeasure', 'denominator', 'geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('simple', 'OBS_GetCategory', None, 'point'),
('simple', 'OBS_GetCategory', None, 'geom'),
('simple', 'OBS_GetCategory', None, 'offset_geom'),
('complex', 'OBS_GetMeasure', 'predenominated', 'point'),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom'),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'area', 'point'),
('complex', 'OBS_GetMeasure', 'area', 'geom'),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'point'),
('complex', 'OBS_GetMeasure', 'denominator', 'geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('complex', 'OBS_GetCategory', None, 'point'),
('complex', 'OBS_GetCategory', None, 'geom'),
('complex', 'OBS_GetCategory', None, 'offset_geom'),
]) ])
def test_performance(api_method, arg): def test_performance(geom_complexity, api_method, normalization, geom):
print api_method, arg print api_method, geom_complexity, normalization, geom
col = 'measure' if 'measure' in api_method.lower() else 'category' col = 'measure' if 'measure' in api_method.lower() else 'category'
for rows in (1, 10, 50, 100): results = []
q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format(
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50 )
for rows in rownums:
stmt = '''UPDATE obs_perftest_{complexity}
SET {col} = {schema}{api_method}({args})
WHERE cartodb_id < {n}'''.format(
col=col, col=col,
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '', schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method, api_method=api_method,
args=ARGS[api_method].format(arg), args=ARGS[api_method, normalization].format(geom),
n=rows+1) n=rows+1)
start = time() start = time()
query(q) query(stmt)
end = time() end = time()
print rows, ': ', (rows / (end - start)), ' QPS' qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': api_method,
'normalization': normalization,
'geom': geom
}, results)