diff --git a/NEWS.md b/NEWS.md index abd33a6..073b373 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,29 @@ +1.1.6 (2016-12-08) + +__Bugfixes__ + +* Fix divide by zero condition in "denominator" branch of `OBS_GetMeasure` + when passing in a polygon ([#233](https://github.com/CartoDB/observatory-extension/pull/233)). + +__Improvements__ + +* Use `ST_Subdivide` to improve performance when functions are called on very + complex geometries (with many points) ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Improve raster scoring to more heavily weight boundaries with nearer to + correct number of points, and penalize boundaries with lots of blank space + ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Remove some redundant area calculations in `OBS_GetMeasure` + ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Replace use of `format('%L', var)` with proper use of `EXECUTE` and `$1` etc. + variables ([#231](https://github.com/CartoDB/observatory-extension/pull/231)) +* Add test point for Brazil + ([#229](https://github.com/CartoDB/observatory-extension/pull/229)) +* Improvements to performance tests + ([#229](https://github.com/CartoDB/observatory-extension/pull/229)) + - Support simple and complex geometries + - Handle all code branches + - Add ability to persist results to JSON for graph visualization later + 1.1.5 (2016-11-29) __Bugfixes__ diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index 05981f2..8e964cd 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -232,27 +232,25 @@ BEGIN -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , (data_table_info)[1]->>'tablename') - INTO data_geoid_colname; + AND t.tablename = $1' + INTO data_geoid_colname USING (data_table_info)[1]->>'tablename'; EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , geom_table_name) - INTO geom_geoid_colname; + AND t.tablename = $1' + INTO geom_geoid_colname USING geom_table_name; EXECUTE format('SELECT %I @@ -268,11 +266,10 @@ BEGIN EXECUTE format('SELECT ST_Area(the_geom::geography) / (1000 * 1000) FROM observatory.%I - WHERE %I = %L', + WHERE %I = $1', geom_table_name, - geom_geoid_colname, - geoid) - INTO area; + geom_geoid_colname) + INTO area USING geoid; IF area IS NULL THEN @@ -343,7 +340,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure( measure_id TEXT, normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, - time_span TEXT DEFAULT NULL + time_span TEXT DEFAULT NULL, + simplification NUMERIC DEFAULT 0.00001 ) RETURNS NUMERIC AS $$ @@ -369,13 +367,16 @@ BEGIN RETURN NULL; END IF; - geom := ST_SnapToGrid(geom, 0.000001); + IF simplification IS NOT NULL THEN + geom := ST_Simplify(geom, simplification); + END IF; IF ST_GeometryType(geom) = 'ST_Point' THEN geom_type := 'point'; ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN geom_type := 'polygon'; - geom := ST_Buffer(geom, 0.000001); + --geom := ST_Buffer(geom, 0.000001); + geom := ST_CollectionExtract(ST_MakeValid(geom), 3); ELSE RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''', ST_GeometryType(geom); @@ -435,103 +436,101 @@ BEGIN IF map_type = 'areaNormalized' THEN sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Within(%L, geom.%I) + WHERE ST_Within($1, geom.%I) LIMIT 1) SELECT numer.%I / (SELECT area FROM _geom) FROM observatory.%I numer WHERE numer.%I = (SELECT geom_ref FROM _geom)', geom_colname, geom_geomref_colname, geom_tablename, - geom, geom_colname, numer_colname, numer_tablename, + geom_colname, numer_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0) FROM observatory.%I numer - WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', + WHERE numer.%I = + (SELECT geom.%I + FROM observatory.%I geom + WHERE ST_Within($1, geom.%I) LIMIT 1)', numer_colname, denom_colname, denom_tablename, denom_geomref_colname, numer_geomref_colname, - numer_tablename, - numer_geomref_colname, geom_geomref_colname, - geom_tablename, geom, geom_colname); + numer_tablename, numer_geomref_colname, + geom_geomref_colname, geom_tablename, geom_colname); ELSIF map_type = 'predenominated' THEN sql = format('SELECT numer.%I FROM observatory.%I numer - WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', - numer_colname, numer_tablename, - numer_geomref_colname, geom_geomref_colname, geom_tablename, - geom, geom_colname); + WHERE numer.%I = + (SELECT geom.%I + FROM observatory.%I geom + WHERE ST_Within($1, geom.%I) LIMIT 1)', + numer_colname, numer_tablename, numer_geomref_colname, + geom_geomref_colname, geom_tablename, geom_colname); END IF; ELSIF geom_type = 'polygon' THEN IF map_type = 'areaNormalized' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / - (ST_Area(%L::Geography) / 1000000) + (ST_Area($1::Geography) / 1000000) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, geom_colname, - geom_geomref_colname, geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - numer_colname, numer_geomref_colname, - geom, numer_tablename, - numer_geomref_colname); + geom_colname, geom_colname, geom_geomref_colname, geom_tablename, + geom_colname, geom_geomref_colname, numer_colname, + numer_geomref_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0), + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I), _denom AS (SELECT denom.%I, denom.%I geom_ref FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / - SUM((SELECT _denom.%I * (SELECT _geom.overlap + NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = _denom.geom_ref) - FROM _denom WHERE _denom.geom_ref = numer.%I)) + FROM _denom WHERE _denom.geom_ref = numer.%I)), 0) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, - geom_colname, geom_geomref_colname, - geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - denom_colname, denom_geomref_colname, - denom_tablename, - denom_geomref_colname, - numer_colname, numer_geomref_colname, - denom_colname, - numer_geomref_colname, - numer_tablename, - numer_geomref_colname); + geom_colname, geom_colname, geom_geomref_colname, + geom_tablename, geom_colname, geom_geomref_colname, + denom_colname, denom_geomref_colname, denom_tablename, + denom_geomref_colname, numer_colname, numer_geomref_colname, + denom_colname, numer_geomref_colname, + numer_tablename, numer_geomref_colname); ELSIF map_type = 'predenominated' THEN IF numer_aggregate NOT ILIKE 'sum' THEN RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead', numer_name, measure_id; ELSE - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, + geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I + ) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, geom_colname, - geom_geomref_colname, geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - numer_colname, numer_geomref_colname, - numer_tablename, + geom_colname, geom_colname, geom_geomref_colname, + geom_tablename, geom_colname, geom_geomref_colname, + numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); END IF; END IF; END IF; - EXECUTE sql INTO result; + EXECUTE sql INTO result USING geom; RETURN result; END; @@ -574,11 +573,11 @@ BEGIN EXECUTE format( 'SELECT %I FROM observatory.%I data - WHERE data.%I = %L', + WHERE data.%I = $1', colname, target_table, - data_geoid_colname, geom_ref) - INTO measure_val; + data_geoid_colname) + INTO measure_val USING geom_ref; RETURN measure_val; @@ -627,28 +626,27 @@ BEGIN 'SELECT data.%I FROM observatory.%I data, observatory.%I geom WHERE data.%I = geom.%I - AND ST_WITHIN(%L, geom.%I) ', + AND ST_WITHIN($1, geom.%I) ', colname, data_table, geom_table, data_geomref_colname, - geom_geomref_colname, geom, geom_colname) - INTO category_val; + geom_geomref_colname, geom_colname) + INTO category_val USING geom; ELSE -- favor the category with the most area EXECUTE format( 'SELECT data.%I category, SUM(overlap_fraction) category_share FROM observatory.%I data, ( SELECT ST_Area( - ST_Intersection(%L, a.%I) - ) / ST_Area(%L) AS overlap_fraction, a.%I geomref + ST_Intersection($1, a.%I) + ) / ST_Area($1) AS overlap_fraction, a.%I geomref FROM observatory.%I as a - WHERE %L && a.%I) _overlaps + WHERE $1 && a.%I) _overlaps WHERE data.%I = _overlaps.geomref GROUP BY category ORDER BY SUM(overlap_fraction) DESC LIMIT 1', - colname, data_table, - geom, geom_colname, geom, geom_geomref_colname, - geom_table, geom, geom_colname, data_geomref_colname) - INTO category_val, category_share; + colname, data_table, geom_colname, geom_geomref_colname, + geom_table, geom_colname, data_geomref_colname) + INTO category_val, category_share USING geom; END IF; RETURN category_val; @@ -738,10 +736,11 @@ BEGIN -- TODO use a super-column for global pop population_measure_id := 'us.census.acs.B01003001'; - EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure( - %L, %L, %L, %L, %L - ) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span) - INTO result; + EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure( + $1, $2, $3, $4, $5 + ) LIMIT 1' + INTO result + USING geom, population_measure_id, normalize, boundary_id, time_span; return result; END; @@ -770,27 +769,25 @@ BEGIN -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , (data_table_info)[1]->>'tablename') - INTO data_geoid_colname; + AND t.tablename = $1' + INTO data_geoid_colname USING (data_table_info)[1]->>'tablename'; EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , geom_table_name) - INTO geom_geoid_colname; + AND t.tablename = $1' + INTO geom_geoid_colname USING geom_table_name; q_select := format('SELECT %I, ', data_geoid_colname); q_sum := 'SELECT Array['; diff --git a/src/pg/sql/42_observatory_exploration.sql b/src/pg/sql/42_observatory_exploration.sql index 9f0e1d2..2a214f0 100644 --- a/src/pg/sql/42_observatory_exploration.sql +++ b/src/pg/sql/42_observatory_exploration.sql @@ -431,10 +431,7 @@ BEGIN RETURN QUERY EXECUTE format($string$ SELECT - (1 / (abs(numgeoms - $3) - --* (1 / Coalesce(NullIf(notnull_percent, 0), 1)) - --* (1 / Coalesce(NullIf(percentfill, 0), 0.0001)) - ))::Numeric + ((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric AS score, * FROM ( WITH clipped_geom AS ( diff --git a/src/pg/test/sql/42_observatory_exploration_test.sql b/src/pg/test/sql/42_observatory_exploration_test.sql index 5e6bac8..8570383 100644 --- a/src/pg/test/sql/42_observatory_exploration_test.sql +++ b/src/pg/test/sql/42_observatory_exploration_test.sql @@ -352,25 +352,25 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014; SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county'] + 'us.census.tiger.county', 'us.census.tiger.zcta5'] AS _obs_geometryscores_500m_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326), ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county']); + 'us.census.tiger.county', 'us.census.tiger.zcta5']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county'] + 'us.census.tiger.county', 'us.census.tiger.zcta5'] AS _obs_geometryscores_5km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326), ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county']); + 'us.census.tiger.county', 'us.census.tiger.zcta5']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', - 'us.census.tiger.county', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group', + 'us.census.tiger.zcta5', 'us.census.tiger.county'] AS _obs_geometryscores_50km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -378,8 +378,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = 'us.census.tiger.zcta5', 'us.census.tiger.county']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY[ 'us.census.tiger.county', 'us.census.tiger.zcta5', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract', + 'us.census.tiger.county', 'us.census.tiger.block_group'] AS _obs_geometryscores_500km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326), @@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text = 'us.census.tiger.zcta5', 'us.census.tiger.county']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract', + 'us.census.tiger.zcta5', 'us.census.tiger.block_group'] AS _obs_geometryscores_500km_buffer_50_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -445,8 +445,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = 'us.census.tiger.zcta5', 'us.census.tiger.county'], 50); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) - = ARRAY['us.census.tiger.zcta5', 'us.census.tiger.county', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + = ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract', + 'us.census.tiger.block_group', 'us.census.tiger.county'] AS _obs_geometryscores_500km_buffer_500_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -454,8 +454,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) 'us.census.tiger.zcta5', 'us.census.tiger.county'], 500); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', - 'us.census.tiger.county', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group', + 'us.census.tiger.zcta5', 'us.census.tiger.county'] AS _obs_geometryscores_500km_buffer_2500_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index c84ea5e..897eba0 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -34,6 +34,7 @@ AND 'us.census.acs.acs' = ANY (subsection_tags) AND numer_weight > 0 ''').fetchall() + SKIP_COLUMNS = set([ u'mx.inegi_columns.INDI18', u'mx.inegi_columns.ECO40', @@ -61,6 +62,10 @@ SKIP_COLUMNS = set([ u'mx.inegi_columns.POB33', u'mx.inegi_columns.POB58', u'mx.inegi_columns.DISC4', + u'mx.inegi_columns.VIV41', + u'mx.inegi_columns.VIV40', + u'mx.inegi_columns.VIV17', + u'mx.inegi_columns.EDU10' ]) #def default_geometry_id(column_id): @@ -146,7 +151,8 @@ def default_lonlat(column_id): return (40.7, -73.9) elif column_id.startswith('eu.'): raise SkipTest('No tests for Eurostat!') - #return (52.52207036136366, 13.40606689453125) + elif column_id.startswith('br.'): + return (-23.53, -46.63) else: raise Exception('No catalog point set for {}'.format(column_id)) diff --git a/src/python/test/perftest.py b/src/python/test/perftest.py index 660767a..45b1de9 100644 --- a/src/python/test/perftest.py +++ b/src/python/test/perftest.py @@ -5,17 +5,47 @@ from util import query, commit from time import time +import json +import os + USE_SCHEMA = True for q in ( - 'DROP TABLE IF EXISTS obs_censustest', - '''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY, - the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''', - '''INSERT INTO obs_censustest (the_geom, name) - SELECT * FROM {schema}OBS_GetBoundariesByGeometry( - st_makeenvelope(-74.05437469482422,40.66319159533881, - -73.81885528564453,40.745696344339564, 4326), - 'us.census.tiger.block_group_clipped') As m(the_geom, geoid)''' + 'DROP TABLE IF EXISTS obs_perftest_simple', + '''CREATE TABLE obs_perftest_simple (cartodb_id SERIAL PRIMARY KEY, + point GEOMETRY, + geom GEOMETRY, + offset_geom GEOMETRY, + name TEXT, measure NUMERIC, category TEXT)''', + '''INSERT INTO obs_perftest_simple (point, geom, offset_geom, name) + SELECT ST_PointOnSurface(the_geom) point, + the_geom geom, + ST_Translate(the_geom, -0.1, 0.1) offset_geom, + geom_refs AS name + FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry( + st_makeenvelope(-74.05437469482422,40.66319159533881, + -73.81885528564453,40.745696344339564, 4326), + 'us.census.tiger.census_tract_clipped')) foo + ORDER BY ST_NPoints(the_geom) ASC + LIMIT 500''', + 'DROP TABLE IF EXISTS obs_perftest_complex', + '''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY, + point GEOMETRY, + geom GEOMETRY, + offset_geom GEOMETRY, + name TEXT, measure NUMERIC, category TEXT)''', + '''INSERT INTO obs_perftest_complex (point, geom, offset_geom, name) + SELECT ST_PointOnSurface(the_geom) point, + the_geom geom, + ST_Translate(the_geom, -0.1, 0.1) offset_geom, + geom_refs AS name + FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry( + st_makeenvelope(-75.05437469482422,40.66319159533881, + -73.81885528564453,41.745696344339564, 4326), + 'us.census.tiger.county_clipped')) foo + ORDER BY ST_NPoints(the_geom) DESC + LIMIT 50;''', + #'''SET statement_timeout = 5000;''' ): query(q.format( schema='cdb_observatory.' if USE_SCHEMA else '', @@ -24,39 +54,90 @@ for q in ( ARGS = { - 'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'", - 'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'", - 'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'", + ('OBS_GetMeasureByID', None): "name, 'us.census.acs.B01001002', '{}'", + ('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001'", + ('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'", + ('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'", + ('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'", } -GEOMS = { - 'point': 'ST_PointOnSurface(the_geom)', - 'polygon_match': 'the_geom', - 'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)', -} + +def record(params, results): + sha = os.environ['OBS_EXTENSION_SHA'] + fpath = os.path.join(os.environ['OBS_PERFTEST_DIR'], sha + '.json') + if os.path.isfile(fpath): + tests = json.load(open(fpath, 'r')) + else: + tests = {} + with open(fpath, 'w') as fhandle: + tests[json.dumps(params)] = { + 'params': params, + 'results': results + } + json.dump(tests, fhandle) @parameterized([ - ('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'), - ('OBS_GetMeasureByID', 'us.census.tiger.county'), - ('OBS_GetMeasure', GEOMS['point']), - ('OBS_GetMeasure', GEOMS['polygon_match']), - ('OBS_GetMeasure', GEOMS['polygon_buffered']), - ('OBS_GetCategory', GEOMS['point']), - ('OBS_GetCategory', GEOMS['polygon_match']), - ('OBS_GetCategory', GEOMS['polygon_buffered']), + ('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'), + ('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'), + + ('simple', 'OBS_GetMeasure', 'predenominated', 'point'), + ('simple', 'OBS_GetMeasure', 'predenominated', 'geom'), + ('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + ('simple', 'OBS_GetMeasure', 'area', 'point'), + ('simple', 'OBS_GetMeasure', 'area', 'geom'), + ('simple', 'OBS_GetMeasure', 'area', 'offset_geom'), + ('simple', 'OBS_GetMeasure', 'denominator', 'point'), + ('simple', 'OBS_GetMeasure', 'denominator', 'geom'), + ('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + ('simple', 'OBS_GetCategory', None, 'point'), + ('simple', 'OBS_GetCategory', None, 'geom'), + ('simple', 'OBS_GetCategory', None, 'offset_geom'), + + ('complex', 'OBS_GetMeasure', 'predenominated', 'point'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'area', 'point'), + ('complex', 'OBS_GetMeasure', 'area', 'geom'), + ('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'point'), + ('complex', 'OBS_GetMeasure', 'denominator', 'geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + ('complex', 'OBS_GetCategory', None, 'point'), + ('complex', 'OBS_GetCategory', None, 'geom'), + ('complex', 'OBS_GetCategory', None, 'offset_geom'), ]) -def test_performance(api_method, arg): - print api_method, arg +def test_performance(geom_complexity, api_method, normalization, geom): + print api_method, geom_complexity, normalization, geom col = 'measure' if 'measure' in api_method.lower() else 'category' - for rows in (1, 10, 50, 100): - q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format( - col=col, - schema='cdb_observatory.' if USE_SCHEMA else '', - api_method=api_method, - args=ARGS[api_method].format(arg), - n=rows+1) + results = [] + + rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50 ) + for rows in rownums: + stmt = '''UPDATE obs_perftest_{complexity} + SET {col} = {schema}{api_method}({args}) + WHERE cartodb_id < {n}'''.format( + col=col, + complexity=geom_complexity, + schema='cdb_observatory.' if USE_SCHEMA else '', + api_method=api_method, + args=ARGS[api_method, normalization].format(geom), + n=rows+1) start = time() - query(q) + query(stmt) end = time() - print rows, ': ', (rows / (end - start)), ' QPS' + qps = (rows / (end - start)) + results.append({ + 'rows': rows, + 'qps': qps, + 'stmt': stmt + }) + print rows, ': ', qps, ' QPS' + + if 'OBS_RECORD_TEST' in os.environ: + record({ + 'geom_complexity': geom_complexity, + 'api_method': api_method, + 'normalization': normalization, + 'geom': geom + }, results)