Merge pull request #236 from CartoDB/develop

Release 1.1.6
This commit is contained in:
Mario de Frutos 2016-12-12 16:24:31 +01:00 committed by GitHub
commit f378e75d4c
6 changed files with 256 additions and 149 deletions

26
NEWS.md
View File

@ -1,3 +1,29 @@
1.1.6 (2016-12-08)
__Bugfixes__
* Fix divide by zero condition in "denominator" branch of `OBS_GetMeasure`
when passing in a polygon ([#233](https://github.com/CartoDB/observatory-extension/pull/233)).
__Improvements__
* Use `ST_Subdivide` to improve performance when functions are called on very
complex geometries (with many points) ([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Improve raster scoring to more heavily weight boundaries with nearer to
correct number of points, and penalize boundaries with lots of blank space
([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Remove some redundant area calculations in `OBS_GetMeasure`
([#232](https://github.com/CartoDB/observatory-extension/pull/232))
* Replace use of `format('%L', var)` with proper use of `EXECUTE` and `$1` etc.
variables ([#231](https://github.com/CartoDB/observatory-extension/pull/231))
* Add test point for Brazil
([#229](https://github.com/CartoDB/observatory-extension/pull/229))
* Improvements to performance tests
([#229](https://github.com/CartoDB/observatory-extension/pull/229))
- Support simple and complex geometries
- Handle all code branches
- Add ability to persist results to JSON for graph visualization later
1.1.5 (2016-11-29)
__Bugfixes__

View File

@ -232,27 +232,25 @@ BEGIN
-- we *really* should pass in both geom_table_name and boundary_id
-- TODO tablename should not be passed here (use boundary_id)
EXECUTE
format('SELECT ct.colname
'SELECT ct.colname
FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct,
observatory.obs_table t
WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id
AND ct.table_id = t.id
AND t.tablename = %L'
, (data_table_info)[1]->>'tablename')
INTO data_geoid_colname;
AND t.tablename = $1'
INTO data_geoid_colname USING (data_table_info)[1]->>'tablename';
EXECUTE
format('SELECT ct.colname
'SELECT ct.colname
FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct,
observatory.obs_table t
WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id
AND ct.table_id = t.id
AND t.tablename = %L'
, geom_table_name)
INTO geom_geoid_colname;
AND t.tablename = $1'
INTO geom_geoid_colname USING geom_table_name;
EXECUTE
format('SELECT %I
@ -268,11 +266,10 @@ BEGIN
EXECUTE
format('SELECT ST_Area(the_geom::geography) / (1000 * 1000)
FROM observatory.%I
WHERE %I = %L',
WHERE %I = $1',
geom_table_name,
geom_geoid_colname,
geoid)
INTO area;
geom_geoid_colname)
INTO area USING geoid;
IF area IS NULL
THEN
@ -343,7 +340,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure(
measure_id TEXT,
normalize TEXT DEFAULT NULL,
boundary_id TEXT DEFAULT NULL,
time_span TEXT DEFAULT NULL
time_span TEXT DEFAULT NULL,
simplification NUMERIC DEFAULT 0.00001
)
RETURNS NUMERIC
AS $$
@ -369,13 +367,16 @@ BEGIN
RETURN NULL;
END IF;
geom := ST_SnapToGrid(geom, 0.000001);
IF simplification IS NOT NULL THEN
geom := ST_Simplify(geom, simplification);
END IF;
IF ST_GeometryType(geom) = 'ST_Point' THEN
geom_type := 'point';
ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN
geom_type := 'polygon';
geom := ST_Buffer(geom, 0.000001);
--geom := ST_Buffer(geom, 0.000001);
geom := ST_CollectionExtract(ST_MakeValid(geom), 3);
ELSE
RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''',
ST_GeometryType(geom);
@ -435,103 +436,101 @@ BEGIN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Within(%L, geom.%I)
WHERE ST_Within($1, geom.%I)
LIMIT 1)
SELECT numer.%I / (SELECT area FROM _geom)
FROM observatory.%I numer
WHERE numer.%I = (SELECT geom_ref FROM _geom)',
geom_colname, geom_geomref_colname, geom_tablename,
geom, geom_colname, numer_colname, numer_tablename,
geom_colname, numer_colname, numer_tablename,
numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0)
FROM observatory.%I numer
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)',
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, denom_colname, denom_tablename,
denom_geomref_colname, numer_geomref_colname,
numer_tablename,
numer_geomref_colname, geom_geomref_colname,
geom_tablename, geom, geom_colname);
numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
ELSIF map_type = 'predenominated' THEN
sql = format('SELECT numer.%I
FROM observatory.%I numer
WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)',
numer_colname, numer_tablename,
numer_geomref_colname, geom_geomref_colname, geom_tablename,
geom, geom_colname);
WHERE numer.%I =
(SELECT geom.%I
FROM observatory.%I geom
WHERE ST_Within($1, geom.%I) LIMIT 1)',
numer_colname, numer_tablename, numer_geomref_colname,
geom_geomref_colname, geom_tablename, geom_colname);
END IF;
ELSIF geom_type = 'polygon' THEN
IF map_type = 'areaNormalized' THEN
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Intersects(%L, geom.%I)
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0)
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
(ST_Area(%L::Geography) / 1000000)
(ST_Area($1::Geography) / 1000000)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname, geom_colname,
geom_geomref_colname, geom_tablename,
geom, geom_colname,
geom, geom_colname, geom_colname,
numer_colname, numer_geomref_colname,
geom, numer_tablename,
numer_geomref_colname);
geom_colname, geom_colname, geom_geomref_colname, geom_tablename,
geom_colname, geom_geomref_colname, numer_colname,
numer_geomref_colname, numer_tablename, numer_geomref_colname);
ELSIF map_type = 'denominated' THEN
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Intersects(%L, geom.%I)
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0),
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I),
_denom AS (SELECT denom.%I, denom.%I geom_ref
FROM observatory.%I denom
WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[]))
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) /
SUM((SELECT _denom.%I * (SELECT _geom.overlap
NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap
FROM _geom
WHERE _geom.geom_ref = _denom.geom_ref)
FROM _denom WHERE _denom.geom_ref = numer.%I))
FROM _denom WHERE _denom.geom_ref = numer.%I)), 0)
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname,
geom_colname, geom_geomref_colname,
geom_tablename,
geom, geom_colname,
geom, geom_colname, geom_colname,
denom_colname, denom_geomref_colname,
denom_tablename,
denom_geomref_colname,
numer_colname, numer_geomref_colname,
denom_colname,
numer_geomref_colname,
numer_tablename,
numer_geomref_colname);
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
denom_colname, denom_geomref_colname, denom_tablename,
denom_geomref_colname, numer_colname, numer_geomref_colname,
denom_colname, numer_geomref_colname,
numer_tablename, numer_geomref_colname);
ELSIF map_type = 'predenominated' THEN
IF numer_aggregate NOT ILIKE 'sum' THEN
RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead',
numer_name, measure_id;
ELSE
sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I))
/ ST_Area(geom.%I) overlap, geom.%I geom_ref
FROM observatory.%I geom
WHERE ST_Intersects(%L, geom.%I)
AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0)
sql = format('WITH _subdivided AS (
SELECT ST_Subdivide($1) AS geom
), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I)))
/ ST_Area(cdb_observatory.FIRST(geom.%I)) overlap,
geom.%I geom_ref
FROM observatory.%I geom, _subdivided s
WHERE ST_Intersects(s.geom, geom.%I)
GROUP BY geom.%I
)
SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I))
FROM observatory.%I numer
WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])',
geom, geom_colname, geom_colname,
geom_geomref_colname, geom_tablename,
geom, geom_colname,
geom, geom_colname, geom_colname,
numer_colname, numer_geomref_colname,
numer_tablename,
geom_colname, geom_colname, geom_geomref_colname,
geom_tablename, geom_colname, geom_geomref_colname,
numer_colname, numer_geomref_colname, numer_tablename,
numer_geomref_colname);
END IF;
END IF;
END IF;
EXECUTE sql INTO result;
EXECUTE sql INTO result USING geom;
RETURN result;
END;
@ -574,11 +573,11 @@ BEGIN
EXECUTE format(
'SELECT %I
FROM observatory.%I data
WHERE data.%I = %L',
WHERE data.%I = $1',
colname,
target_table,
data_geoid_colname, geom_ref)
INTO measure_val;
data_geoid_colname)
INTO measure_val USING geom_ref;
RETURN measure_val;
@ -627,28 +626,27 @@ BEGIN
'SELECT data.%I
FROM observatory.%I data, observatory.%I geom
WHERE data.%I = geom.%I
AND ST_WITHIN(%L, geom.%I) ',
AND ST_WITHIN($1, geom.%I) ',
colname, data_table, geom_table, data_geomref_colname,
geom_geomref_colname, geom, geom_colname)
INTO category_val;
geom_geomref_colname, geom_colname)
INTO category_val USING geom;
ELSE
-- favor the category with the most area
EXECUTE format(
'SELECT data.%I category, SUM(overlap_fraction) category_share
FROM observatory.%I data, (
SELECT ST_Area(
ST_Intersection(%L, a.%I)
) / ST_Area(%L) AS overlap_fraction, a.%I geomref
ST_Intersection($1, a.%I)
) / ST_Area($1) AS overlap_fraction, a.%I geomref
FROM observatory.%I as a
WHERE %L && a.%I) _overlaps
WHERE $1 && a.%I) _overlaps
WHERE data.%I = _overlaps.geomref
GROUP BY category
ORDER BY SUM(overlap_fraction) DESC
LIMIT 1',
colname, data_table,
geom, geom_colname, geom, geom_geomref_colname,
geom_table, geom, geom_colname, data_geomref_colname)
INTO category_val, category_share;
colname, data_table, geom_colname, geom_geomref_colname,
geom_table, geom_colname, data_geomref_colname)
INTO category_val, category_share USING geom;
END IF;
RETURN category_val;
@ -738,10 +736,11 @@ BEGIN
-- TODO use a super-column for global pop
population_measure_id := 'us.census.acs.B01003001';
EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure(
%L, %L, %L, %L, %L
) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span)
INTO result;
EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure(
$1, $2, $3, $4, $5
) LIMIT 1'
INTO result
USING geom, population_measure_id, normalize, boundary_id, time_span;
return result;
END;
@ -770,27 +769,25 @@ BEGIN
-- we *really* should pass in both geom_table_name and boundary_id
-- TODO tablename should not be passed here (use boundary_id)
EXECUTE
format('SELECT ct.colname
'SELECT ct.colname
FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct,
observatory.obs_table t
WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id
AND ct.table_id = t.id
AND t.tablename = %L'
, (data_table_info)[1]->>'tablename')
INTO data_geoid_colname;
AND t.tablename = $1'
INTO data_geoid_colname USING (data_table_info)[1]->>'tablename';
EXECUTE
format('SELECT ct.colname
'SELECT ct.colname
FROM observatory.obs_column_to_column c2c,
observatory.obs_column_table ct,
observatory.obs_table t
WHERE c2c.reltype = ''geom_ref''
AND ct.column_id = c2c.source_id
AND ct.table_id = t.id
AND t.tablename = %L'
, geom_table_name)
INTO geom_geoid_colname;
AND t.tablename = $1'
INTO geom_geoid_colname USING geom_table_name;
q_select := format('SELECT %I, ', data_geoid_colname);
q_sum := 'SELECT Array[';

View File

@ -431,10 +431,7 @@ BEGIN
RETURN QUERY
EXECUTE format($string$
SELECT
(1 / (abs(numgeoms - $3)
--* (1 / Coalesce(NullIf(notnull_percent, 0), 1))
--* (1 / Coalesce(NullIf(percentfill, 0), 0.0001))
))::Numeric
((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric
AS score, *
FROM (
WITH clipped_geom AS (

View File

@ -352,25 +352,25 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014;
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_500m_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']
'us.census.tiger.county', 'us.census.tiger.zcta5']
AS _obs_geometryscores_5km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326),
ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.county']);
'us.census.tiger.county', 'us.census.tiger.zcta5']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5',
'us.census.tiger.county', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -378,8 +378,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY[ 'us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.county', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -445,8 +445,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
'us.census.tiger.zcta5', 'us.census.tiger.county'], 50);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.county',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
= ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract',
'us.census.tiger.block_group', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -454,8 +454,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC)
'us.census.tiger.zcta5', 'us.census.tiger.county'], 500);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5',
'us.census.tiger.county', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group',
'us.census.tiger.zcta5', 'us.census.tiger.county']
AS _obs_geometryscores_500km_buffer_2500_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),

View File

@ -34,6 +34,7 @@ AND 'us.census.acs.acs' = ANY (subsection_tags)
AND numer_weight > 0
''').fetchall()
SKIP_COLUMNS = set([
u'mx.inegi_columns.INDI18',
u'mx.inegi_columns.ECO40',
@ -61,6 +62,10 @@ SKIP_COLUMNS = set([
u'mx.inegi_columns.POB33',
u'mx.inegi_columns.POB58',
u'mx.inegi_columns.DISC4',
u'mx.inegi_columns.VIV41',
u'mx.inegi_columns.VIV40',
u'mx.inegi_columns.VIV17',
u'mx.inegi_columns.EDU10'
])
#def default_geometry_id(column_id):
@ -146,7 +151,8 @@ def default_lonlat(column_id):
return (40.7, -73.9)
elif column_id.startswith('eu.'):
raise SkipTest('No tests for Eurostat!')
#return (52.52207036136366, 13.40606689453125)
elif column_id.startswith('br.'):
return (-23.53, -46.63)
else:
raise Exception('No catalog point set for {}'.format(column_id))

View File

@ -5,17 +5,47 @@ from util import query, commit
from time import time
import json
import os
USE_SCHEMA = True
for q in (
'DROP TABLE IF EXISTS obs_censustest',
'''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY,
the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_censustest (the_geom, name)
SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
'DROP TABLE IF EXISTS obs_perftest_simple',
'''CREATE TABLE obs_perftest_simple (cartodb_id SERIAL PRIMARY KEY,
point GEOMETRY,
geom GEOMETRY,
offset_geom GEOMETRY,
name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_perftest_simple (point, geom, offset_geom, name)
SELECT ST_PointOnSurface(the_geom) point,
the_geom geom,
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326),
'us.census.tiger.block_group_clipped') As m(the_geom, geoid)'''
'us.census.tiger.census_tract_clipped')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 500''',
'DROP TABLE IF EXISTS obs_perftest_complex',
'''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY,
point GEOMETRY,
geom GEOMETRY,
offset_geom GEOMETRY,
name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_perftest_complex (point, geom, offset_geom, name)
SELECT ST_PointOnSurface(the_geom) point,
the_geom geom,
ST_Translate(the_geom, -0.1, 0.1) offset_geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-75.05437469482422,40.66319159533881,
-73.81885528564453,41.745696344339564, 4326),
'us.census.tiger.county_clipped')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
#'''SET statement_timeout = 5000;'''
):
query(q.format(
schema='cdb_observatory.' if USE_SCHEMA else '',
@ -24,39 +54,90 @@ for q in (
ARGS = {
'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'",
'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'",
'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'",
('OBS_GetMeasureByID', None): "name, 'us.census.acs.B01001002', '{}'",
('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001'",
('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'",
('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'",
('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'",
}
GEOMS = {
'point': 'ST_PointOnSurface(the_geom)',
'polygon_match': 'the_geom',
'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)',
}
def record(params, results):
sha = os.environ['OBS_EXTENSION_SHA']
fpath = os.path.join(os.environ['OBS_PERFTEST_DIR'], sha + '.json')
if os.path.isfile(fpath):
tests = json.load(open(fpath, 'r'))
else:
tests = {}
with open(fpath, 'w') as fhandle:
tests[json.dumps(params)] = {
'params': params,
'results': results
}
json.dump(tests, fhandle)
@parameterized([
('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'),
('OBS_GetMeasureByID', 'us.census.tiger.county'),
('OBS_GetMeasure', GEOMS['point']),
('OBS_GetMeasure', GEOMS['polygon_match']),
('OBS_GetMeasure', GEOMS['polygon_buffered']),
('OBS_GetCategory', GEOMS['point']),
('OBS_GetCategory', GEOMS['polygon_match']),
('OBS_GetCategory', GEOMS['polygon_buffered']),
('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'),
('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'),
('simple', 'OBS_GetMeasure', 'predenominated', 'point'),
('simple', 'OBS_GetMeasure', 'predenominated', 'geom'),
('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('simple', 'OBS_GetMeasure', 'area', 'point'),
('simple', 'OBS_GetMeasure', 'area', 'geom'),
('simple', 'OBS_GetMeasure', 'area', 'offset_geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'point'),
('simple', 'OBS_GetMeasure', 'denominator', 'geom'),
('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('simple', 'OBS_GetCategory', None, 'point'),
('simple', 'OBS_GetCategory', None, 'geom'),
('simple', 'OBS_GetCategory', None, 'offset_geom'),
('complex', 'OBS_GetMeasure', 'predenominated', 'point'),
('complex', 'OBS_GetMeasure', 'predenominated', 'geom'),
('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'area', 'point'),
('complex', 'OBS_GetMeasure', 'area', 'geom'),
('complex', 'OBS_GetMeasure', 'area', 'offset_geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'point'),
('complex', 'OBS_GetMeasure', 'denominator', 'geom'),
('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'),
('complex', 'OBS_GetCategory', None, 'point'),
('complex', 'OBS_GetCategory', None, 'geom'),
('complex', 'OBS_GetCategory', None, 'offset_geom'),
])
def test_performance(api_method, arg):
print api_method, arg
def test_performance(geom_complexity, api_method, normalization, geom):
print api_method, geom_complexity, normalization, geom
col = 'measure' if 'measure' in api_method.lower() else 'category'
for rows in (1, 10, 50, 100):
q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format(
results = []
rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50 )
for rows in rownums:
stmt = '''UPDATE obs_perftest_{complexity}
SET {col} = {schema}{api_method}({args})
WHERE cartodb_id < {n}'''.format(
col=col,
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
args=ARGS[api_method].format(arg),
args=ARGS[api_method, normalization].format(geom),
n=rows+1)
start = time()
query(q)
query(stmt)
end = time()
print rows, ': ', (rows / (end - start)), ' QPS'
qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': api_method,
'normalization': normalization,
'geom': geom
}, results)