use obs_column_table_tile_raster with simpler bands for faster performance

This commit is contained in:
John Krauss 2016-12-12 21:25:59 +00:00
parent 82137d5679
commit d4e6e7ac95
6 changed files with 504 additions and 313 deletions

View File

@ -61,7 +61,8 @@ def get_tablename_query(column_id, boundary_id, timespan):
METADATA_TABLES = ['obs_table', 'obs_column_table', 'obs_column', 'obs_column_tag',
'obs_tag', 'obs_column_to_column', 'obs_dump_version', 'obs_meta',
'obs_meta_numer', 'obs_meta_denom', 'obs_meta_geom',
'obs_meta_timespan', 'obs_column_table_tile', ]
'obs_meta_timespan', 'obs_column_table_tile',
'obs_column_table_tile_simple']
FIXTURES = [
('us.census.acs.B01003001_quantile', 'us.census.tiger.census_tract', '2010 - 2014'),
@ -290,7 +291,8 @@ def main():
"column_id IN ('{}', '{}')".format(numer_id, geom_id)
for numer_id, geom_id, timespan in FIXTURES
])
elif tablename in ('obs_column_table', 'obs_column_table_tile', ):
elif tablename in ('obs_column_table', 'obs_column_table_tile',
'obs_column_table_tile_simple'):
where = 'WHERE column_id IN ({numer_ids}) ' \
'OR column_id IN ({geom_ids}) ' \
'OR table_id IN ({table_ids}) '.format(

View File

@ -412,11 +412,6 @@ BEGIN
END
$$ LANGUAGE plpgsql;
--create table observatory.obs_column_table_tile_simple as select table_id, column_id, tile_id, st_band(tile, 2) as tile from observatory.obs_column_table_tile ;
--create table observatory.obs_column_table_tile_simpler as select table_id,
--column_id, tile_id, st_reclass(tile, '0-65536:0-65536', '16BUI') as tile
--from observatory.obs_column_table_tile_simple ;
CREATE OR REPLACE FUNCTION cdb_observatory._OBS_GetGeometryScores(
bounds Geometry(Geometry, 4326) DEFAULT NULL,
@ -440,66 +435,62 @@ BEGIN
bounds := ST_Envelope(bounds);
END IF;
RETURN QUERY
EXECUTE format($string$
SELECT
((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms)))) * percentfill)::Numeric
AS score, *
FROM (
WITH clipped_geom AS (
SELECT column_id, table_id
--, CASE WHEN $1 IS NOT NULL THEN ST_Clip(tile, 2, $1, True)
, CASE WHEN $1 IS NOT NULL THEN ST_Clip(tile, $1, True)
ELSE tile END clipped_tile
--, tile as clipped_tile
, tile
FROM observatory.obs_column_table_tile
WHERE ($1 IS NULL OR ST_Intersects($1, tile))
AND (column_id = ANY($2) OR cardinality($2) = 0)
), clipped_geom_countagg AS (
SELECT column_id, table_id
, ST_CountAgg(clipped_tile, 2, True)::Numeric notnull_pixels
, ST_CountAgg(clipped_tile, 2, False)::Numeric pixels
FROM clipped_geom
GROUP BY column_id, table_id
), clipped_geom_reagg AS (
SELECT COUNT(*)::BIGINT cnt, a.column_id,
cdb_observatory.FIRST(pixels) first_pixel,
cdb_observatory.FIRST(notnull_pixels) first_notnull_pixel,
cdb_observatory.FIRST(tile) first_tile,
(ST_SummaryStatsAgg(clipped_tile, 2, True)).sum sum_geoms,
(ST_SummaryStatsAgg(clipped_tile, 3, True)).mean mean_fill
FROM clipped_geom_countagg a, clipped_geom b
WHERE a.table_id = b.table_id
AND a.column_id = b.column_id
GROUP BY a.column_id, a.table_id
) SELECT
EXECUTE $string$
WITH clipped_geom AS (
SELECT column_id, table_id
, CASE WHEN $1 IS NOT NULL THEN ST_Clip(tile, $1, True) -- -20
ELSE tile END clipped_tile
, tile
FROM observatory.obs_column_table_tile_simple
WHERE ($1 IS NULL OR ST_Intersects($1, tile))
AND (column_id = ANY($2) OR cardinality($2) = 0)
), clipped_geom_countagg AS (
SELECT column_id, table_id
, ST_CountAgg(clipped_tile, 1, True)::Numeric notnull_pixels -- -10
, ST_CountAgg(clipped_tile, 1, False)::Numeric pixels -- -10
FROM clipped_geom
GROUP BY column_id, table_id
), clipped_geom_reagg AS (
SELECT COUNT(*)::BIGINT cnt, a.column_id,
cdb_observatory.FIRST(pixels) first_pixel,
cdb_observatory.FIRST(notnull_pixels) first_notnull_pixel,
cdb_observatory.FIRST(tile) first_tile,
(ST_SummaryStatsAgg(clipped_tile, 1, True)).sum::Numeric sum_geoms, -- ND
(ST_SummaryStatsAgg(clipped_tile, 2, True)).mean::Numeric / 255 mean_fill --ND
FROM clipped_geom_countagg a, clipped_geom b
WHERE a.table_id = b.table_id
AND a.column_id = b.column_id
GROUP BY a.column_id, a.table_id
), final AS (
SELECT
cnt, column_id
, (CASE WHEN first_notnull_pixel > 0
THEN first_notnull_pixel / first_pixel
ELSE 1
END)::Numeric AS notnull_percent
END)::Numeric
AS notnull_percent
, (CASE WHEN first_notnull_pixel > 0
THEN sum_geoms
ELSE COALESCE(ST_Value(first_tile, 2, ST_PointOnSurface($1)), 0)
* (ST_Area($1) / ST_Area(ST_PixelAsPolygon(first_tile, 0, 0)) * first_pixel)
END)::Numeric AS numgeoms
ELSE COALESCE(ST_Value(first_tile, 1, ST_PointOnSurface($1)), 0)
* (ST_Area($1) / ST_Area(ST_PixelAsPolygon(first_tile, 0, 0))
* first_pixel) -- -20
END)::Numeric
AS numgeoms
, (CASE WHEN first_notnull_pixel > 0
THEN mean_fill
ELSE COALESCE(ST_Value(first_tile, 3, ST_PointOnSurface($1)), 0)
END)::Numeric AS percentfill
, null::numeric estnumgeoms, null::numeric meanmediansize
--, ((ST_Area(ST_Transform($1, 3857)) / 1000000) / NullIf(
-- CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0
-- THEN (ST_SummaryStatsAgg(clipped_tile, 1, True)).mean
-- ELSE Coalesce(ST_Value(cdb_observatory.FIRST(tile), 1, ST_PointOnSurface($1)), 0)
-- END, 0))::Numeric AS estnumgeoms
--, (CASE WHEN cdb_observatory.FIRST(notnull_pixels) > 0
-- THEN (ST_SummaryStatsAgg(clipped_tile, 1, True)).mean
-- ELSE COALESCE(ST_Value(cdb_observatory.FIRST(tile), 1, ST_PointOnSurface($1)), 0)
-- END)::Numeric AS meanmediansize
ELSE COALESCE(ST_Value(first_tile, 2, ST_PointOnSurface($1))::Numeric / 255, 0) -- -2
END)::Numeric
AS percentfill
, null::numeric estnumgeoms
, null::numeric meanmediansize
FROM clipped_geom_reagg
) foo
$string$) USING bounds, filter_geom_ids, desired_num_geoms;
) SELECT
((100.0 / (1+abs(log(1 + $3) - log(1 + numgeoms::Numeric)))) * percentfill)::Numeric
AS score, *
FROM final
$string$ USING bounds, filter_geom_ids, desired_num_geoms;
RETURN;
END
$$ LANGUAGE plpgsql IMMUTABLE;

View File

@ -13,6 +13,7 @@ DROP TABLE IF EXISTS observatory.obs_meta_denom;
DROP TABLE IF EXISTS observatory.obs_meta_geom;
DROP TABLE IF EXISTS observatory.obs_meta_timespan;
DROP TABLE IF EXISTS observatory.obs_column_table_tile;
DROP TABLE IF EXISTS observatory.obs_column_table_tile_simple;
DROP TABLE IF EXISTS observatory.obs_fcd4e4f5610f6764973ef8c0c215b2e80bec8963;
DROP TABLE IF EXISTS observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308;
DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;

File diff suppressed because one or more lines are too long

View File

@ -412,7 +412,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 12112, "us.census.tiger.census_tract" : 3792, "us.census.tiger.zcta5" : 550, "us.census.tiger.county" : 13 }'
'{ "us.census.tiger.block_group" : 12112, "us.census.tiger.census_tract" : 3792, "us.census.tiger.zcta5" : 550, "us.census.tiger.county" : 14 }'
AS _obs_geometryscores_numgeoms_50km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),
@ -420,7 +420,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 48415, "us.census.tiger.census_tract" : 15776, "us.census.tiger.zcta5" : 6534, "us.census.tiger.county" : 295 }'
'{ "us.census.tiger.block_group" : 48420, "us.census.tiger.census_tract" : 15774, "us.census.tiger.zcta5" : 6533, "us.census.tiger.county" : 304 }'
AS _obs_geometryscores_numgeoms_500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 500000)::Geometry(Geometry, 4326),
@ -428,7 +428,7 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'{ "us.census.tiger.block_group" : 165489, "us.census.tiger.census_tract" : 55152, "us.census.tiger.zcta5" : 26500, "us.census.tiger.county" : 2551 }'
'{ "us.census.tiger.block_group" : 165475, "us.census.tiger.census_tract" : 55128, "us.census.tiger.zcta5" : 26499, "us.census.tiger.county" : 2622 }'
AS _obs_geometryscores_numgeoms_2500km_buffer
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 2500000)::Geometry(Geometry, 4326),
@ -436,8 +436,8 @@ SELECT JSON_Object_Agg(geom_id, numgeoms::int ORDER BY numgeoms DESC)::Text =
'us.census.tiger.zcta5', 'us.census.tiger.county']);
SELECT ARRAY_AGG(geom_id ORDER BY score DESC) =
ARRAY['us.census.tiger.county', 'us.census.tiger.census_tract',
'us.census.tiger.zcta5', 'us.census.tiger.block_group']
ARRAY['us.census.tiger.county', 'us.census.tiger.zcta5',
'us.census.tiger.census_tract', 'us.census.tiger.block_group']
AS _obs_geometryscores_500km_buffer_50_geoms
FROM cdb_observatory._OBS_GetGeometryScores(
ST_Buffer(ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)::Geography, 50000)::Geometry(Geometry, 4326),

View File

@ -45,6 +45,30 @@ for q in (
'us.census.tiger.county_clipped')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_simple',
'''CREATE TABLE obs_perftest_country_simple (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_simple (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) ASC
LIMIT 50;''',
'DROP TABLE IF EXISTS obs_perftest_country_complex',
'''CREATE TABLE obs_perftest_country_complex (cartodb_id SERIAL PRIMARY KEY,
geom GEOMETRY,
name TEXT) ''',
'''INSERT INTO obs_perftest_country_complex (geom, name)
SELECT the_geom geom,
geom_refs AS name
FROM (SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-179,-89, 179,89, 4326),
'whosonfirst.wof_country_geom')) foo
ORDER BY ST_NPoints(the_geom) DESC
LIMIT 50;''',
#'''SET statement_timeout = 5000;'''
):
query(q.format(
@ -59,6 +83,7 @@ ARGS = {
('OBS_GetMeasure', 'area'): "{}, 'us.census.acs.B01001002', 'area'",
('OBS_GetMeasure', 'denominator'): "{}, 'us.census.acs.B01001002', 'denominator'",
('OBS_GetCategory', None): "{}, 'us.census.spielman_singleton_segments.X10'",
('_OBS_GetGeometryScores', None): "{}, NULL"
}
@ -76,6 +101,56 @@ def record(params, results):
}
json.dump(tests, fhandle)
@parameterized([
('simple', '_OBS_GetGeometryScores', 'NULL', 1),
('simple', '_OBS_GetGeometryScores', 'NULL', 500),
('simple', '_OBS_GetGeometryScores', 'NULL', 3000),
('complex', '_OBS_GetGeometryScores', 'NULL', 1),
('complex', '_OBS_GetGeometryScores', 'NULL', 500),
('complex', '_OBS_GetGeometryScores', 'NULL', 3000),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 1),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 500),
('country_simple', '_OBS_GetGeometryScores', 'NULL', 5000),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 1),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 500),
('country_complex', '_OBS_GetGeometryScores', 'NULL', 5000),
])
def test_getgeometryscores_performance(geom_complexity, api_method, filters, target_geoms):
print api_method, geom_complexity, filters, target_geoms
rownums = (1, 5, 10, ) if 'complex' in geom_complexity else (5, 25, 50,)
results = []
for rows in rownums:
stmt = '''SELECT {schema}{api_method}(geom, {filters}, {target_geoms})
FROM obs_perftest_{complexity}
WHERE cartodb_id < {n}'''.format(
complexity=geom_complexity,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
filters=filters,
target_geoms=target_geoms,
n=rows+1)
start = time()
query(stmt)
end = time()
qps = (rows / (end - start))
results.append({
'rows': rows,
'qps': qps,
'stmt': stmt
})
print rows, ': ', qps, ' QPS'
if 'OBS_RECORD_TEST' in os.environ:
record({
'geom_complexity': geom_complexity,
'api_method': api_method,
'filters': filters,
'target_geoms': target_geoms
}, results)
@parameterized([
('simple', 'OBS_GetMeasureByID', None, 'us.census.tiger.census_tract'),
@ -107,7 +182,7 @@ def record(params, results):
('complex', 'OBS_GetCategory', None, 'geom'),
('complex', 'OBS_GetCategory', None, 'offset_geom'),
])
def test_performance(geom_complexity, api_method, normalization, geom):
def test_getmeasure_performance(geom_complexity, api_method, normalization, geom):
print api_method, geom_complexity, normalization, geom
col = 'measure' if 'measure' in api_method.lower() else 'category'
results = []