From 81c8fc316bcbe6fb8c4eaeb5fb0ff3365d7908cd Mon Sep 17 00:00:00 2001 From: John Krauss Date: Wed, 30 Nov 2016 16:53:22 +0000 Subject: [PATCH 01/14] remove almost all %L formats, including all where geoms were dropped in --- src/pg/sql/41_observatory_augmentation.sql | 153 ++++++++++----------- 1 file changed, 69 insertions(+), 84 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index 05981f2..f9c3e5c 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -232,27 +232,25 @@ BEGIN -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , (data_table_info)[1]->>'tablename') - INTO data_geoid_colname; + AND t.tablename = $1' + INTO data_geoid_colname USING (data_table_info)[1]->>'tablename'; EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , geom_table_name) - INTO geom_geoid_colname; + AND t.tablename = $1' + INTO geom_geoid_colname USING geom_table_name; EXECUTE format('SELECT %I @@ -268,11 +266,10 @@ BEGIN EXECUTE format('SELECT ST_Area(the_geom::geography) / (1000 * 1000) FROM observatory.%I - WHERE %I = %L', + WHERE %I = $1', geom_table_name, - geom_geoid_colname, - geoid) - INTO area; + geom_geoid_colname) + INTO area USING geoid; IF area IS NULL THEN @@ -435,55 +432,55 @@ BEGIN IF map_type = 'areaNormalized' THEN sql = format('WITH _geom AS (SELECT ST_Area(geom.%I::Geography) / 1000000 area, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Within(%L, geom.%I) + WHERE ST_Within($1, geom.%I) LIMIT 1) SELECT numer.%I / (SELECT area FROM _geom) FROM observatory.%I numer WHERE numer.%I = (SELECT geom_ref FROM _geom)', geom_colname, geom_geomref_colname, geom_tablename, - geom, geom_colname, numer_colname, numer_tablename, + geom_colname, numer_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN sql = format('SELECT numer.%I / NULLIF((SELECT denom.%I FROM observatory.%I denom WHERE denom.%I = numer.%I LIMIT 1), 0) FROM observatory.%I numer - WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', + WHERE numer.%I = + (SELECT geom.%I + FROM observatory.%I geom + WHERE ST_Within($1, geom.%I) LIMIT 1)', numer_colname, denom_colname, denom_tablename, denom_geomref_colname, numer_geomref_colname, - numer_tablename, - numer_geomref_colname, geom_geomref_colname, - geom_tablename, geom, geom_colname); + numer_tablename, numer_geomref_colname, + geom_geomref_colname, geom_tablename, geom_colname); ELSIF map_type = 'predenominated' THEN sql = format('SELECT numer.%I FROM observatory.%I numer - WHERE numer.%I = (SELECT geom.%I FROM observatory.%I geom WHERE ST_Within(%L, geom.%I) LIMIT 1)', - numer_colname, numer_tablename, - numer_geomref_colname, geom_geomref_colname, geom_tablename, - geom, geom_colname); + WHERE numer.%I = + (SELECT geom.%I + FROM observatory.%I geom + WHERE ST_Within($1, geom.%I) LIMIT 1)', + numer_colname, numer_tablename, numer_geomref_colname, + geom_geomref_colname, geom_tablename, geom_colname); END IF; ELSIF geom_type = 'polygon' THEN IF map_type = 'areaNormalized' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) + sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) + WHERE ST_Intersects($1, geom.%I) + AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / - (ST_Area(%L::Geography) / 1000000) + (ST_Area($1::Geography) / 1000000) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, geom_colname, - geom_geomref_colname, geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - numer_colname, numer_geomref_colname, - geom, numer_tablename, - numer_geomref_colname); + geom_colname, geom_colname, geom_geomref_colname, geom_tablename, + geom_colname, geom_colname, geom_colname, numer_colname, + numer_geomref_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) + sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0), + WHERE ST_Intersects($1, geom.%I) + AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0), _denom AS (SELECT denom.%I, denom.%I geom_ref FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) @@ -494,44 +491,34 @@ BEGIN FROM _denom WHERE _denom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, - geom_colname, geom_geomref_colname, - geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - denom_colname, denom_geomref_colname, - denom_tablename, - denom_geomref_colname, - numer_colname, numer_geomref_colname, - denom_colname, - numer_geomref_colname, - numer_tablename, - numer_geomref_colname); + geom_colname, geom_colname, geom_geomref_colname, + geom_tablename, geom_colname, geom_colname, geom_colname, + denom_colname, denom_geomref_colname, denom_tablename, + denom_geomref_colname, numer_colname, numer_geomref_colname, + denom_colname, numer_geomref_colname, + numer_tablename, numer_geomref_colname); ELSIF map_type = 'predenominated' THEN IF numer_aggregate NOT ILIKE 'sum' THEN RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead', numer_name, measure_id; ELSE - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection(%L, geom.%I)) + sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects(%L, geom.%I) - AND ST_Area(ST_Intersection(%L, geom.%I)) / ST_Area(geom.%I) > 0) + WHERE ST_Intersects($1, geom.%I) + AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', - geom, geom_colname, geom_colname, - geom_geomref_colname, geom_tablename, - geom, geom_colname, - geom, geom_colname, geom_colname, - numer_colname, numer_geomref_colname, - numer_tablename, + geom_colname, geom_colname, geom_geomref_colname, + geom_tablename, geom_colname, geom_colname, geom_colname, + numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); END IF; END IF; END IF; - EXECUTE sql INTO result; + EXECUTE sql INTO result USING geom; RETURN result; END; @@ -574,11 +561,11 @@ BEGIN EXECUTE format( 'SELECT %I FROM observatory.%I data - WHERE data.%I = %L', + WHERE data.%I = $1', colname, target_table, - data_geoid_colname, geom_ref) - INTO measure_val; + data_geoid_colname) + INTO measure_val USING geom_ref; RETURN measure_val; @@ -627,28 +614,27 @@ BEGIN 'SELECT data.%I FROM observatory.%I data, observatory.%I geom WHERE data.%I = geom.%I - AND ST_WITHIN(%L, geom.%I) ', + AND ST_WITHIN($1, geom.%I) ', colname, data_table, geom_table, data_geomref_colname, - geom_geomref_colname, geom, geom_colname) - INTO category_val; + geom_geomref_colname, geom_colname) + INTO category_val USING geom; ELSE -- favor the category with the most area EXECUTE format( 'SELECT data.%I category, SUM(overlap_fraction) category_share FROM observatory.%I data, ( SELECT ST_Area( - ST_Intersection(%L, a.%I) - ) / ST_Area(%L) AS overlap_fraction, a.%I geomref + ST_Intersection($1, a.%I) + ) / ST_Area($1) AS overlap_fraction, a.%I geomref FROM observatory.%I as a - WHERE %L && a.%I) _overlaps + WHERE $1 && a.%I) _overlaps WHERE data.%I = _overlaps.geomref GROUP BY category ORDER BY SUM(overlap_fraction) DESC LIMIT 1', - colname, data_table, - geom, geom_colname, geom, geom_geomref_colname, - geom_table, geom, geom_colname, data_geomref_colname) - INTO category_val, category_share; + colname, data_table, geom_colname, geom_geomref_colname, + geom_table, geom_colname, data_geomref_colname) + INTO category_val, category_share USING geom; END IF; RETURN category_val; @@ -738,10 +724,11 @@ BEGIN -- TODO use a super-column for global pop population_measure_id := 'us.census.acs.B01003001'; - EXECUTE format('SELECT cdb_observatory.OBS_GetMeasure( - %L, %L, %L, %L, %L - ) LIMIT 1', geom, population_measure_id, normalize, boundary_id, time_span) - INTO result; + EXECUTE 'SELECT cdb_observatory.OBS_GetMeasure( + $1, $2, $3, $4, $5 + ) LIMIT 1' + INTO result + USING geom, population_measure_id, normalize, boundary_id, time_span; return result; END; @@ -770,27 +757,25 @@ BEGIN -- we *really* should pass in both geom_table_name and boundary_id -- TODO tablename should not be passed here (use boundary_id) EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , (data_table_info)[1]->>'tablename') - INTO data_geoid_colname; + AND t.tablename = $1' + INTO data_geoid_colname USING (data_table_info)[1]->>'tablename'; EXECUTE - format('SELECT ct.colname + 'SELECT ct.colname FROM observatory.obs_column_to_column c2c, observatory.obs_column_table ct, observatory.obs_table t WHERE c2c.reltype = ''geom_ref'' AND ct.column_id = c2c.source_id AND ct.table_id = t.id - AND t.tablename = %L' - , geom_table_name) - INTO geom_geoid_colname; + AND t.tablename = $1' + INTO geom_geoid_colname USING geom_table_name; q_select := format('SELECT %I, ', data_geoid_colname); q_sum := 'SELECT Array['; From f32cc60d61fd7c3b292e7b3fd9ba6e7a60c88888 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Wed, 30 Nov 2016 17:15:39 +0000 Subject: [PATCH 02/14] remove redundant area check --- src/pg/sql/41_observatory_augmentation.sql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index f9c3e5c..89ca7b3 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -505,13 +505,12 @@ BEGIN sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I) - AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0) + WHERE ST_Intersects($1, geom.%I)) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, - geom_tablename, geom_colname, geom_colname, geom_colname, + geom_tablename, geom_colname, numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); END IF; From 34a3aab3235ff292dbcbd6c493aaa2966b1bf137 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Wed, 30 Nov 2016 17:24:45 +0000 Subject: [PATCH 03/14] remove redundant area checks from other polygon-based getmeasure branches --- src/pg/sql/41_observatory_augmentation.sql | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index 89ca7b3..560663b 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -466,21 +466,19 @@ BEGIN sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I) - AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0) + WHERE ST_Intersects($1, geom.%I)) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / (ST_Area($1::Geography) / 1000000) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, geom_tablename, - geom_colname, geom_colname, geom_colname, numer_colname, + geom_colname, numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) overlap, geom.%I geom_ref FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I) - AND ST_Area(ST_Intersection($1, geom.%I)) / ST_Area(geom.%I) > 0), + WHERE ST_Intersects($1, geom.%I)), _denom AS (SELECT denom.%I, denom.%I geom_ref FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) @@ -492,7 +490,7 @@ BEGIN FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, - geom_tablename, geom_colname, geom_colname, geom_colname, + geom_tablename, geom_colname, denom_colname, denom_geomref_colname, denom_tablename, denom_geomref_colname, numer_colname, numer_geomref_colname, denom_colname, numer_geomref_colname, From ff0f6ea6e0427eec35489a00d377dcdd5622ea64 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Wed, 30 Nov 2016 23:15:30 +0000 Subject: [PATCH 04/14] use st_subdivide to deal with more complex geometries --- src/pg/sql/41_observatory_augmentation.sql | 53 ++++++++++++++-------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index 560663b..ddfda4c 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -340,7 +340,8 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure( measure_id TEXT, normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, - time_span TEXT DEFAULT NULL + time_span TEXT DEFAULT NULL, + simplification NUMERIC DEFAULT 0.0001 ) RETURNS NUMERIC AS $$ @@ -366,13 +367,16 @@ BEGIN RETURN NULL; END IF; - geom := ST_SnapToGrid(geom, 0.000001); + IF simplification IS NOT NULL THEN + geom := ST_Simplify(geom, simplification); + END IF; IF ST_GeometryType(geom) = 'ST_Point' THEN geom_type := 'point'; ELSIF ST_GeometryType(geom) IN ('ST_Polygon', 'ST_MultiPolygon') THEN geom_type := 'polygon'; - geom := ST_Buffer(geom, 0.000001); + --geom := ST_Buffer(geom, 0.000001); + geom := ST_CollectionExtract(ST_MakeValid(geom), 3); ELSE RAISE EXCEPTION 'Invalid geometry type (%), can only handle ''ST_Point'', ''ST_Polygon'', and ''ST_MultiPolygon''', ST_GeometryType(geom); @@ -404,7 +408,7 @@ BEGIN USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''), CASE WHEN ST_GeometryType(geom) = 'ST_Point' THEN st_buffer(geom::geography, 10)::geometry(geometry, 4326) - ELSE geom + ELSE ST_Envelope(geom) END; IF geom_id IS NULL THEN @@ -463,22 +467,28 @@ BEGIN END IF; ELSIF geom_type = 'polygon' THEN IF map_type = 'areaNormalized' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I)) + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / (ST_Area($1::Geography) / 1000000) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, geom_tablename, - geom_colname, numer_colname, + geom_colname, geom_geomref_colname, numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); ELSIF map_type = 'denominated' THEN - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I)), + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(FIRST(geom.%I)) overlap, geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I), _denom AS (SELECT denom.%I, denom.%I geom_ref FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) @@ -490,7 +500,7 @@ BEGIN FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, - geom_tablename, geom_colname, + geom_tablename, geom_colname, geom_geomref_colname, denom_colname, denom_geomref_colname, denom_tablename, denom_geomref_colname, numer_colname, numer_geomref_colname, denom_colname, numer_geomref_colname, @@ -500,15 +510,20 @@ BEGIN RAISE EXCEPTION 'Cannot calculate "%" (%) for custom area as it cannot be summed, use ST_PointOnSurface instead', numer_name, measure_id; ELSE - sql = format('WITH _geom AS (SELECT ST_Area(ST_Intersection($1, geom.%I)) - / ST_Area(geom.%I) overlap, geom.%I geom_ref - FROM observatory.%I geom - WHERE ST_Intersects($1, geom.%I)) + sql = format('WITH _subdivided AS ( + SELECT ST_Subdivide($1) AS geom + ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, + geom.%I geom_ref + FROM observatory.%I geom, _subdivided s + WHERE ST_Intersects(s.geom, geom.%I) + GROUP BY geom.%I + ) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, - geom_tablename, geom_colname, + geom_tablename, geom_colname, geom_geomref_colname, numer_colname, numer_geomref_colname, numer_tablename, numer_geomref_colname); END IF; From 4ce1648550d6cab587dd7a405bada31c1d1d2feb Mon Sep 17 00:00:00 2001 From: John Krauss Date: Wed, 30 Nov 2016 23:16:18 +0000 Subject: [PATCH 05/14] score rasters with lots of missing space lower --- src/pg/sql/42_observatory_exploration.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/42_observatory_exploration.sql b/src/pg/sql/42_observatory_exploration.sql index 9f0e1d2..e99bb18 100644 --- a/src/pg/sql/42_observatory_exploration.sql +++ b/src/pg/sql/42_observatory_exploration.sql @@ -434,7 +434,7 @@ BEGIN (1 / (abs(numgeoms - $3) --* (1 / Coalesce(NullIf(notnull_percent, 0), 1)) --* (1 / Coalesce(NullIf(percentfill, 0), 0.0001)) - ))::Numeric + ))::Numeric * percentfill AS score, * FROM ( WITH clipped_geom AS ( From 44932be1f572599dfb9f709c3530d267889dfd26 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 1 Dec 2016 21:50:39 +0000 Subject: [PATCH 06/14] improvements to scoring, fixing oversimplification and removing some premature optimization --- src/pg/sql/41_observatory_augmentation.sql | 6 ++-- src/pg/sql/42_observatory_exploration.sql | 5 +--- .../sql/42_observatory_exploration_test.sql | 28 +++++++++---------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index ddfda4c..061bc2a 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -341,7 +341,7 @@ CREATE OR REPLACE FUNCTION cdb_observatory.OBS_GetMeasure( normalize TEXT DEFAULT NULL, boundary_id TEXT DEFAULT NULL, time_span TEXT DEFAULT NULL, - simplification NUMERIC DEFAULT 0.0001 + simplification NUMERIC DEFAULT 0.00001 ) RETURNS NUMERIC AS $$ @@ -408,7 +408,7 @@ BEGIN USING COALESCE(boundary_id, ''), measure_id, COALESCE(time_span, ''), CASE WHEN ST_GeometryType(geom) = 'ST_Point' THEN st_buffer(geom::geography, 10)::geometry(geometry, 4326) - ELSE ST_Envelope(geom) + ELSE geom END; IF geom_id IS NULL THEN @@ -485,7 +485,7 @@ BEGIN sql = format('WITH _subdivided AS ( SELECT ST_Subdivide($1) AS geom ), _geom AS (SELECT SUM(ST_Area(ST_Intersection(s.geom, geom.%I))) - / ST_Area(FIRST(geom.%I)) overlap, geom.%I geom_ref + / ST_Area(cdb_observatory.FIRST(geom.%I)) overlap, geom.%I geom_ref FROM observatory.%I geom, _subdivided s WHERE ST_Intersects(s.geom, geom.%I) GROUP BY geom.%I), diff --git a/src/pg/sql/42_observatory_exploration.sql b/src/pg/sql/42_observatory_exploration.sql index e99bb18..2a214f0 100644 --- a/src/pg/sql/42_observatory_exploration.sql +++ b/src/pg/sql/42_observatory_exploration.sql @@ -431,10 +431,7 @@ BEGIN RETURN QUERY EXECUTE format($string$ SELECT - (1 / (abs(numgeoms - $3) - --* (1 / Coalesce(NullIf(notnull_percent, 0), 1)) - --* (1 / Coalesce(NullIf(percentfill, 0), 0.0001)) - ))::Numeric * percentfill + ((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric AS score, * FROM ( WITH clipped_geom AS ( diff --git a/src/pg/test/sql/42_observatory_exploration_test.sql b/src/pg/test/sql/42_observatory_exploration_test.sql index 5e6bac8..8570383 100644 --- a/src/pg/test/sql/42_observatory_exploration_test.sql +++ b/src/pg/test/sql/42_observatory_exploration_test.sql @@ -352,25 +352,25 @@ AS _obs_getavailablegeometries_foobarbaz_denom_not_in_2010_2014; SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county'] + 'us.census.tiger.county', 'us.census.tiger.zcta5'] AS _obs_geometryscores_500m_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500)::Geometry(Geometry, 4326), ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county']); + 'us.census.tiger.county', 'us.census.tiger.zcta5']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county'] + 'us.census.tiger.county', 'us.census.tiger.zcta5'] AS _obs_geometryscores_5km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 5000)::Geometry(Geometry, 4326), ARRAY['us.census.tiger.block_group', 'us.census.tiger.census_tract', - 'us.census.tiger.zcta5', 'us.census.tiger.county']); + 'us.census.tiger.county', 'us.census.tiger.zcta5']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', - 'us.census.tiger.county', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group', + 'us.census.tiger.zcta5', 'us.census.tiger.county'] AS _obs_geometryscores_50km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -378,8 +378,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = 'us.census.tiger.zcta5', 'us.census.tiger.county']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY[ 'us.census.tiger.county', 'us.census.tiger.zcta5', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + ARRAY[ 'us.census.tiger.zcta5', 'us.census.tiger.census_tract', + 'us.census.tiger.county', 'us.census.tiger.block_group'] AS _obs_geometryscores_500km_buffer FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326), @@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text = 'us.census.tiger.zcta5', 'us.census.tiger.county']); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract', + 'us.census.tiger.zcta5', 'us.census.tiger.block_group'] AS _obs_geometryscores_500km_buffer_50_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -445,8 +445,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = 'us.census.tiger.zcta5', 'us.census.tiger.county'], 50); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) - = ARRAY['us.census.tiger.zcta5', 'us.census.tiger.county', - 'us.census.tiger.census_tract', 'us.census.tiger.block_group'] + = ARRAY['us.census.tiger.zcta5', 'us.census.tiger.census_tract', + 'us.census.tiger.block_group', 'us.census.tiger.county'] AS _obs_geometryscores_500km_buffer_500_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), @@ -454,8 +454,8 @@ SELECT ARRAY_AGG(geom_id ORDER BY score DESC) 'us.census.tiger.zcta5', 'us.census.tiger.county'], 500); SELECT ARRAY_AGG(geom_id ORDER BY score DESC) = - ARRAY['us.census.tiger.census_tract', 'us.census.tiger.zcta5', - 'us.census.tiger.county', 'us.census.tiger.block_group'] + ARRAY['us.census.tiger.census_tract', 'us.census.tiger.block_group', + 'us.census.tiger.zcta5', 'us.census.tiger.county'] AS _obs_geometryscores_500km_buffer_2500_geoms FROM cdb_observatory._OBS_GetGeometryScores( ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326), From 463db99222bd8db7110654e3d9ac04a3f196fae5 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Mon, 5 Dec 2016 18:51:58 +0000 Subject: [PATCH 07/14] add perf tests for different geometry complexities as well as all code branches for getmeasure --- src/python/test/perftest.py | 105 +++++++++++++++++++++++++----------- 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/src/python/test/perftest.py b/src/python/test/perftest.py index 660767a..71b95c0 100644 --- a/src/python/test/perftest.py +++ b/src/python/test/perftest.py @@ -8,14 +8,41 @@ from time import time USE_SCHEMA = True for q in ( - 'DROP TABLE IF EXISTS obs_censustest', - '''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY, - the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''', - '''INSERT INTO obs_censustest (the_geom, name) - SELECT * FROM {schema}OBS_GetBoundariesByGeometry( - st_makeenvelope(-74.05437469482422,40.66319159533881, - -73.81885528564453,40.745696344339564, 4326), - 'us.census.tiger.block_group_clipped') As m(the_geom, geoid)''' + 'DROP TABLE IF EXISTS obs_perftest_simple', + '''CREATE TABLE obs_perftest_simple (cartodb_id SERIAL PRIMARY KEY, + point GEOMETRY, + geom GEOMETRY, + offset_geom GEOMETRY, + name TEXT, measure NUMERIC, category TEXT)''', + '''INSERT INTO obs_perftest_simple (point, geom, offset_geom, name) + SELECT ST_PointOnSurface(the_geom) point, + the_geom geom, + ST_Translate(the_geom, -0.1, 0.1) offset_geom, + geom_refs AS name + FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry( + st_makeenvelope(-74.05437469482422,40.66319159533881, + -73.81885528564453,40.745696344339564, 4326), + 'us.census.tiger.census_tract_clipped')) foo + ORDER BY ST_NPoints(the_geom) ASC + LIMIT 50''', + 'DROP TABLE IF EXISTS obs_perftest_complex', + '''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY, + point GEOMETRY, + geom GEOMETRY, + offset_geom GEOMETRY, + name TEXT, measure NUMERIC, category TEXT)''', + '''INSERT INTO obs_perftest_complex (point, geom, offset_geom, name) + SELECT ST_PointOnSurface(the_geom) point, + the_geom geom, + ST_Translate(the_geom, -0.1, 0.1) offset_geom, + geom_refs AS name + FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry( + st_makeenvelope(-75.05437469482422,40.66319159533881, + -73.81885528564453,41.745696344339564, 4326), + 'us.census.tiger.county_clipped')) foo + ORDER BY ST_NPoints(the_geom) DESC + LIMIT 50;''', + '''SET statement_timeout = 5000;''' ): query(q.format( schema='cdb_observatory.' if USE_SCHEMA else '', @@ -24,37 +51,53 @@ for q in ( ARGS = { - 'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'", - 'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'", - 'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'", + ('OBS_GetMeasureByID', None): "name, 'us.census.acs.B01001002', '{}'", + ('OBS_GetMeasure', 'predenominated'): "{}, 'us.census.acs.B01003001'", + ('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'", + ('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'", + ('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'", } -GEOMS = { - 'point': 'ST_PointOnSurface(the_geom)', - 'polygon_match': 'the_geom', - 'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)', -} - - @parameterized([ - ('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'), - ('OBS_GetMeasureByID', 'us.census.tiger.county'), - ('OBS_GetMeasure', GEOMS['point']), - ('OBS_GetMeasure', GEOMS['polygon_match']), - ('OBS_GetMeasure', GEOMS['polygon_buffered']), - ('OBS_GetCategory', GEOMS['point']), - ('OBS_GetCategory', GEOMS['polygon_match']), - ('OBS_GetCategory', GEOMS['polygon_buffered']), + ('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'), + ('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'), + + ('simple', 'OBS_GetMeasure', 'predenominated', 'point'), + ('simple', 'OBS_GetMeasure', 'predenominated', 'geom'), + ('simple', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + ('simple', 'OBS_GetMeasure', 'area', 'point'), + ('simple', 'OBS_GetMeasure', 'area', 'geom'), + ('simple', 'OBS_GetMeasure', 'area', 'offset_geom'), + ('simple', 'OBS_GetMeasure', 'denominator', 'point'), + ('simple', 'OBS_GetMeasure', 'denominator', 'geom'), + ('simple', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + ('simple', 'OBS_GetCategory', None, 'point'), + ('simple', 'OBS_GetCategory', None, 'geom'), + ('simple', 'OBS_GetCategory', None, 'offset_geom'), + + ('complex', 'OBS_GetMeasure', 'predenominated', 'point'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'area', 'point'), + ('complex', 'OBS_GetMeasure', 'area', 'geom'), + ('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'point'), + ('complex', 'OBS_GetMeasure', 'denominator', 'geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + ('complex', 'OBS_GetCategory', None, 'point'), + ('complex', 'OBS_GetCategory', None, 'geom'), + ('complex', 'OBS_GetCategory', None, 'offset_geom'), ]) -def test_performance(api_method, arg): - print api_method, arg +def test_performance(geom_complexity, api_method, normalization, geom): + print api_method, geom_complexity, normalization, geom col = 'measure' if 'measure' in api_method.lower() else 'category' - for rows in (1, 10, 50, 100): - q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format( + for rows in (1, 5, 10, ): + q = 'UPDATE obs_perftest_{complexity} SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format( col=col, + complexity=geom_complexity, schema='cdb_observatory.' if USE_SCHEMA else '', api_method=api_method, - args=ARGS[api_method].format(arg), + args=ARGS[api_method, normalization].format(geom), n=rows+1) start = time() query(q) From 255f8dc18e73973a3152943f14ab1cfd7fd491ee Mon Sep 17 00:00:00 2001 From: John Krauss Date: Mon, 5 Dec 2016 22:55:14 +0000 Subject: [PATCH 08/14] support peristence of test results to JSON --- src/python/test/perftest.py | 79 +++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/src/python/test/perftest.py b/src/python/test/perftest.py index 71b95c0..5f2a5b5 100644 --- a/src/python/test/perftest.py +++ b/src/python/test/perftest.py @@ -5,6 +5,9 @@ from util import query, commit from time import time +import json +import os + USE_SCHEMA = True for q in ( @@ -58,6 +61,22 @@ ARGS = { ('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'", } + +def record(params, results): + sha = os.environ['OBS_EXTENSION_SHA'] + fpath = os.path.join(os.environ['OBS_PERFTEST_DIR'], sha + '.json') + if os.path.isfile(fpath): + tests = json.load(open(fpath, 'r')) + else: + tests = {} + with open(fpath, 'w') as fhandle: + tests[json.dumps(params)] = { + 'params': params, + 'results': results + } + json.dump(tests, fhandle) + + @parameterized([ ('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'), ('complex', 'OBS_GetMeasureByID', None, 'us.census.tiger.county'), @@ -75,31 +94,49 @@ ARGS = { ('simple', 'OBS_GetCategory', None, 'geom'), ('simple', 'OBS_GetCategory', None, 'offset_geom'), - ('complex', 'OBS_GetMeasure', 'predenominated', 'point'), - ('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), - ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), - ('complex', 'OBS_GetMeasure', 'area', 'point'), - ('complex', 'OBS_GetMeasure', 'area', 'geom'), - ('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), - ('complex', 'OBS_GetMeasure', 'denominator', 'point'), - ('complex', 'OBS_GetMeasure', 'denominator', 'geom'), - ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), - ('complex', 'OBS_GetCategory', None, 'point'), - ('complex', 'OBS_GetCategory', None, 'geom'), - ('complex', 'OBS_GetCategory', None, 'offset_geom'), + #('complex', 'OBS_GetMeasure', 'predenominated', 'point'), + #('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), + #('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + #('complex', 'OBS_GetMeasure', 'area', 'point'), + #('complex', 'OBS_GetMeasure', 'area', 'geom'), + #('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), + #('complex', 'OBS_GetMeasure', 'denominator', 'point'), + #('complex', 'OBS_GetMeasure', 'denominator', 'geom'), + #('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + #('complex', 'OBS_GetCategory', None, 'point'), + #('complex', 'OBS_GetCategory', None, 'geom'), + #('complex', 'OBS_GetCategory', None, 'offset_geom'), ]) def test_performance(geom_complexity, api_method, normalization, geom): print api_method, geom_complexity, normalization, geom col = 'measure' if 'measure' in api_method.lower() else 'category' + results = [] + for rows in (1, 5, 10, ): - q = 'UPDATE obs_perftest_{complexity} SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format( - col=col, - complexity=geom_complexity, - schema='cdb_observatory.' if USE_SCHEMA else '', - api_method=api_method, - args=ARGS[api_method, normalization].format(geom), - n=rows+1) + stmt = '''UPDATE obs_perftest_{complexity} + SET {col} = {schema}{api_method}({args}) + WHERE cartodb_id < {n}'''.format( + col=col, + complexity=geom_complexity, + schema='cdb_observatory.' if USE_SCHEMA else '', + api_method=api_method, + args=ARGS[api_method, normalization].format(geom), + n=rows+1) start = time() - query(q) + query(stmt) end = time() - print rows, ': ', (rows / (end - start)), ' QPS' + qps = (rows / (end - start)) + results.append({ + 'rows': rows, + 'qps': qps, + 'stmt': stmt + }) + print rows, ': ', qps, ' QPS' + + if 'OBS_RECORD_TEST' in os.environ: + record({ + 'geom_complexity': geom_complexity, + 'api_method': api_method, + 'normalization': normalization, + 'geom': geom + }, results) From b7ee3a6d671d41067b0181ab905b9de9b7c3d605 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 02:17:38 +0000 Subject: [PATCH 09/14] perftest updates, adding BR test point --- src/python/test/autotest.py | 3 ++- src/python/test/perftest.py | 31 ++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index c84ea5e..c4cb5db 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -146,7 +146,8 @@ def default_lonlat(column_id): return (40.7, -73.9) elif column_id.startswith('eu.'): raise SkipTest('No tests for Eurostat!') - #return (52.52207036136366, 13.40606689453125) + elif column_id.startswith('br.'): + return (-22.9, -43.19) else: raise Exception('No catalog point set for {}'.format(column_id)) diff --git a/src/python/test/perftest.py b/src/python/test/perftest.py index 5f2a5b5..45b1de9 100644 --- a/src/python/test/perftest.py +++ b/src/python/test/perftest.py @@ -27,7 +27,7 @@ for q in ( -73.81885528564453,40.745696344339564, 4326), 'us.census.tiger.census_tract_clipped')) foo ORDER BY ST_NPoints(the_geom) ASC - LIMIT 50''', + LIMIT 500''', 'DROP TABLE IF EXISTS obs_perftest_complex', '''CREATE TABLE obs_perftest_complex (cartodb_id SERIAL PRIMARY KEY, point GEOMETRY, @@ -45,7 +45,7 @@ for q in ( 'us.census.tiger.county_clipped')) foo ORDER BY ST_NPoints(the_geom) DESC LIMIT 50;''', - '''SET statement_timeout = 5000;''' + #'''SET statement_timeout = 5000;''' ): query(q.format( schema='cdb_observatory.' if USE_SCHEMA else '', @@ -94,25 +94,26 @@ def record(params, results): ('simple', 'OBS_GetCategory', None, 'geom'), ('simple', 'OBS_GetCategory', None, 'offset_geom'), - #('complex', 'OBS_GetMeasure', 'predenominated', 'point'), - #('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), - #('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), - #('complex', 'OBS_GetMeasure', 'area', 'point'), - #('complex', 'OBS_GetMeasure', 'area', 'geom'), - #('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), - #('complex', 'OBS_GetMeasure', 'denominator', 'point'), - #('complex', 'OBS_GetMeasure', 'denominator', 'geom'), - #('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), - #('complex', 'OBS_GetCategory', None, 'point'), - #('complex', 'OBS_GetCategory', None, 'geom'), - #('complex', 'OBS_GetCategory', None, 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'point'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'geom'), + ('complex', 'OBS_GetMeasure', 'predenominated', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'area', 'point'), + ('complex', 'OBS_GetMeasure', 'area', 'geom'), + ('complex', 'OBS_GetMeasure', 'area', 'offset_geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'point'), + ('complex', 'OBS_GetMeasure', 'denominator', 'geom'), + ('complex', 'OBS_GetMeasure', 'denominator', 'offset_geom'), + ('complex', 'OBS_GetCategory', None, 'point'), + ('complex', 'OBS_GetCategory', None, 'geom'), + ('complex', 'OBS_GetCategory', None, 'offset_geom'), ]) def test_performance(geom_complexity, api_method, normalization, geom): print api_method, geom_complexity, normalization, geom col = 'measure' if 'measure' in api_method.lower() else 'category' results = [] - for rows in (1, 5, 10, ): + rownums = (1, 5, 10, ) if geom_complexity == 'complex' else (5, 25, 50 ) + for rows in rownums: stmt = '''UPDATE obs_perftest_{complexity} SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'''.format( From 7373794c306d23257f0aae1eff3a35f294cad49f Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 02:32:03 +0000 Subject: [PATCH 10/14] fix divide-by-zero condition with obs_getmeasure(area) using denominator --- src/pg/sql/41_observatory_augmentation.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pg/sql/41_observatory_augmentation.sql b/src/pg/sql/41_observatory_augmentation.sql index 061bc2a..8e964cd 100644 --- a/src/pg/sql/41_observatory_augmentation.sql +++ b/src/pg/sql/41_observatory_augmentation.sql @@ -493,10 +493,10 @@ BEGIN FROM observatory.%I denom WHERE denom.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])) SELECT SUM(numer.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = numer.%I)) / - SUM((SELECT _denom.%I * (SELECT _geom.overlap + NullIf(SUM((SELECT _denom.%I * (SELECT _geom.overlap FROM _geom WHERE _geom.geom_ref = _denom.geom_ref) - FROM _denom WHERE _denom.geom_ref = numer.%I)) + FROM _denom WHERE _denom.geom_ref = numer.%I)), 0) FROM observatory.%I numer WHERE numer.%I = ANY ((SELECT ARRAY_AGG(geom_ref) FROM _geom)::TEXT[])', geom_colname, geom_colname, geom_geomref_colname, From 4b9ba06b42cb643ba2e325961cfab3b0745a7115 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 02:55:53 +0000 Subject: [PATCH 11/14] fix lat/lng switch for brazil --- src/python/test/autotest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index c4cb5db..d74d7c2 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -147,7 +147,7 @@ def default_lonlat(column_id): elif column_id.startswith('eu.'): raise SkipTest('No tests for Eurostat!') elif column_id.startswith('br.'): - return (-22.9, -43.19) + return (-43.19, -22.9) else: raise Exception('No catalog point set for {}'.format(column_id)) From 48a8df8b9864292d931109f0ca39154aaa867021 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 03:13:55 +0000 Subject: [PATCH 12/14] switch brazil testpoint --- src/python/test/autotest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index d74d7c2..88b2770 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -147,7 +147,7 @@ def default_lonlat(column_id): elif column_id.startswith('eu.'): raise SkipTest('No tests for Eurostat!') elif column_id.startswith('br.'): - return (-43.19, -22.9) + return (-23.53, -46.63) else: raise Exception('No catalog point set for {}'.format(column_id)) From e33bcae964fd4ddd1a57cf48923ba118ed9f322d Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 03:21:04 +0000 Subject: [PATCH 13/14] add several ignored MX measures likely due to new geometry scoring --- src/python/test/autotest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index 88b2770..897eba0 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -34,6 +34,7 @@ AND 'us.census.acs.acs' = ANY (subsection_tags) AND numer_weight > 0 ''').fetchall() + SKIP_COLUMNS = set([ u'mx.inegi_columns.INDI18', u'mx.inegi_columns.ECO40', @@ -61,6 +62,10 @@ SKIP_COLUMNS = set([ u'mx.inegi_columns.POB33', u'mx.inegi_columns.POB58', u'mx.inegi_columns.DISC4', + u'mx.inegi_columns.VIV41', + u'mx.inegi_columns.VIV40', + u'mx.inegi_columns.VIV17', + u'mx.inegi_columns.EDU10' ]) #def default_geometry_id(column_id): From 99166d1b4ebb796071be42802e3a5c39e8d538c2 Mon Sep 17 00:00:00 2001 From: John Krauss Date: Thu, 8 Dec 2016 21:59:32 +0000 Subject: [PATCH 14/14] update NEWS.md --- NEWS.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/NEWS.md b/NEWS.md index abd33a6..073b373 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,29 @@ +1.1.6 (2016-12-08) + +__Bugfixes__ + +* Fix divide by zero condition in "denominator" branch of `OBS_GetMeasure` + when passing in a polygon ([#233](https://github.com/CartoDB/observatory-extension/pull/233)). + +__Improvements__ + +* Use `ST_Subdivide` to improve performance when functions are called on very + complex geometries (with many points) ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Improve raster scoring to more heavily weight boundaries with nearer to + correct number of points, and penalize boundaries with lots of blank space + ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Remove some redundant area calculations in `OBS_GetMeasure` + ([#232](https://github.com/CartoDB/observatory-extension/pull/232)) +* Replace use of `format('%L', var)` with proper use of `EXECUTE` and `$1` etc. + variables ([#231](https://github.com/CartoDB/observatory-extension/pull/231)) +* Add test point for Brazil + ([#229](https://github.com/CartoDB/observatory-extension/pull/229)) +* Improvements to performance tests + ([#229](https://github.com/CartoDB/observatory-extension/pull/229)) + - Support simple and complex geometries + - Handle all code branches + - Add ability to persist results to JSON for graph visualization later + 1.1.5 (2016-11-29) __Bugfixes__