diff --git a/src/python/requirements.txt b/src/python/requirements.txt new file mode 100644 index 0000000..dbab3a3 --- /dev/null +++ b/src/python/requirements.txt @@ -0,0 +1,3 @@ +nose +nose_parameterized +psycopg2 diff --git a/scripts/autotest.py b/src/python/test/autotest.py similarity index 67% rename from scripts/autotest.py rename to src/python/test/autotest.py index 7f0924d..effdf4a 100644 --- a/scripts/autotest.py +++ b/src/python/test/autotest.py @@ -1,77 +1,86 @@ from nose.tools import assert_equal, assert_is_not_none +from nose.plugins.skip import SkipTest from nose_parameterized import parameterized -import os -import re -import requests +from util import query -HOSTNAME = os.environ['OBS_HOSTNAME'] -API_KEY = os.environ['OBS_API_KEY'] -META_HOSTNAME = os.environ.get('OBS_META_HOSTNAME', HOSTNAME) -META_API_KEY = os.environ.get('OBS_META_API_KEY', API_KEY) -USE_SCHEMA = 'OBS_USE_SCHEMA' in os.environ +USE_SCHEMA = True - -def query(q, is_meta=False, **options): - ''' - Query the account. Returned is the response, wrapped by the requests - library. - ''' - url = 'https://{hostname}/api/v2/sql'.format( - hostname=META_HOSTNAME if is_meta else HOSTNAME) - params = options.copy() - params['q'] = re.sub(r'\s+', ' ', q) - params['api_key'] = META_API_KEY if is_meta else API_KEY - return requests.get(url, params=params) - -MEASURE_COLUMNS = [(r['numer_id'], r['point_only'], ) for r in query(''' +MEASURE_COLUMNS = query(''' SELECT distinct numer_id, numer_aggregate NOT ILIKE 'sum' as point_only -FROM obs_meta +FROM observatory.obs_meta WHERE numer_type ILIKE 'numeric' AND numer_weight > 0 -''', is_meta=True).json()['rows']] +''').fetchall() -CATEGORY_COLUMNS = [(r['numer_id'], ) for r in query(''' +CATEGORY_COLUMNS = query(''' SELECT distinct numer_id -FROM obs_meta +FROM observatory.obs_meta WHERE numer_type ILIKE 'text' AND numer_weight > 0 -''', is_meta=True).json()['rows']] +''').fetchall() -BOUNDARY_COLUMNS = [(r['id'], ) for r in query(''' -SELECT id FROM obs_column +BOUNDARY_COLUMNS = query(''' +SELECT id FROM observatory.obs_column WHERE type ILIKE 'geometry' AND weight > 0 -''', is_meta=True).json()['rows']] +''').fetchall() -US_CENSUS_MEASURE_COLUMNS = [(r['numer_name'], ) for r in query(''' +US_CENSUS_MEASURE_COLUMNS = query(''' SELECT distinct numer_name -FROM obs_meta +FROM observatory.obs_meta WHERE numer_type ILIKE 'numeric' AND 'us.census.acs.acs' = ANY (subsection_tags) AND numer_weight > 0 -''', is_meta=True).json()['rows']] +''').fetchall() +SKIP_COLUMNS = set([ + u'mx.inegi_columns.INDI18', + u'mx.inegi_columns.ECO40', + u'mx.inegi_columns.POB34', + u'mx.inegi_columns.POB63', + u'mx.inegi_columns.INDI7', + u'mx.inegi_columns.EDU28', + u'mx.inegi_columns.SCONY10', + u'mx.inegi_columns.EDU31', + u'mx.inegi_columns.POB7', + u'mx.inegi_columns.VIV30', + u'mx.inegi_columns.INDI12', + u'mx.inegi_columns.EDU13', + u'mx.inegi_columns.ECO43', + u'mx.inegi_columns.VIV9', + u'mx.inegi_columns.HOGAR25', + u'mx.inegi_columns.POB32', + u'mx.inegi_columns.ECO7', + u'mx.inegi_columns.INDI19', + u'mx.inegi_columns.INDI16', + u'mx.inegi_columns.POB65', + u'mx.inegi_columns.INDI3', + u'mx.inegi_columns.INDI9', + u'mx.inegi_columns.POB36', + u'mx.inegi_columns.POB33', + u'mx.inegi_columns.POB58', +]) def default_geometry_id(column_id): ''' Returns default test point for the column_id. ''' if column_id == 'whosonfirst.wof_disputed_geom': - return 'CDB_LatLng(33.78, 76.57)' + return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)' elif column_id == 'whosonfirst.wof_marinearea_geom': - return 'CDB_LatLng(43.33, -68.47)' + return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)' elif column_id in ('us.census.tiger.school_district_elementary', 'us.census.tiger.school_district_secondary', 'us.census.tiger.school_district_elementary_clipped', 'us.census.tiger.school_district_secondary_clipped'): - return 'CDB_LatLng(40.7025, -73.7067)' + return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)' elif column_id.startswith('es.ine'): - return 'CDB_LatLng(42.8226119029222, -2.51141249535454)' + return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)' elif column_id.startswith('us.zillow'): - return 'CDB_LatLng(28.3305906291771, -81.3544048197256)' + return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)' else: - return 'CDB_LatLng(40.7, -73.9)' + return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)' def default_point(column_id): @@ -79,27 +88,27 @@ def default_point(column_id): Returns default test point for the column_id. ''' if column_id == 'whosonfirst.wof_disputed_geom': - return 'CDB_LatLng(33.78, 76.57)' + return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)' elif column_id == 'whosonfirst.wof_marinearea_geom': - return 'CDB_LatLng(43.33, -68.47)' + return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)' elif column_id in ('us.census.tiger.school_district_elementary', 'us.census.tiger.school_district_secondary', 'us.census.tiger.school_district_elementary_clipped', 'us.census.tiger.school_district_secondary_clipped'): - return 'CDB_LatLng(40.7025, -73.7067)' + return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)' elif column_id.startswith('uk'): if 'WA' in column_id: - return 'CDB_LatLng(51.46844551219723, -3.184833526611328)' + return 'ST_SetSRID(ST_MakePoint(-3.184833526611328, 51.46844551219723), 4326)' else: - return 'CDB_LatLng(51.51461834694225, -0.08883476257324219)' + return 'ST_SetSRID(ST_MakePoint(-0.08883476257324219, 51.51461834694225), 4326)' elif column_id.startswith('es'): - return 'CDB_LatLng(42.8226119029222, -2.51141249535454)' + return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)' elif column_id.startswith('us.zillow'): - return 'CDB_LatLng(28.3305906291771, -81.3544048197256)' + return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)' elif column_id.startswith('mx.'): - return 'CDB_LatLng(19.41347699386547, -99.17019367218018)' + return 'ST_SetSRID(ST_MakePoint(-99.17019367218018, 19.41347699386547), 4326)' else: - return 'CDB_LatLng(40.7, -73.9)' + return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)' def default_area(column_id): @@ -113,21 +122,20 @@ def default_area(column_id): @parameterized(US_CENSUS_MEASURE_COLUMNS) def test_get_us_census_measure_points(name): - print 'test_get_us_census_measure_points, ', name resp = query(''' SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}') '''.format(name=name.replace("'", "''"), schema='cdb_observatory.' if USE_SCHEMA else '', point=default_point(''))) - assert_equal(resp.status_code, 200) - rows = resp.json()['rows'] + rows = resp.fetchall() assert_equal(1, len(rows)) - assert_is_not_none(rows[0].values()[0]) + assert_is_not_none(rows[0][0]) @parameterized(MEASURE_COLUMNS) def test_get_measure_areas(column_id, point_only): - print 'test_get_measure_areas, ', column_id, point_only + if column_id in SKIP_COLUMNS: + raise SkipTest('Column {} should be skipped'.format(column_id)) if point_only: return resp = query(''' @@ -135,24 +143,23 @@ SELECT * FROM {schema}OBS_GetMeasure({area}, '{column_id}') '''.format(column_id=column_id, schema='cdb_observatory.' if USE_SCHEMA else '', area=default_area(column_id))) - assert_equal(resp.status_code, 200) - rows = resp.json()['rows'] + rows = resp.fetchall() assert_equal(1, len(rows)) - assert_is_not_none(rows[0].values()[0]) + assert_is_not_none(rows[0][0]) @parameterized(MEASURE_COLUMNS) def test_get_measure_points(column_id, point_only): - print 'test_get_measure_points, ', column_id, point_only + if column_id in SKIP_COLUMNS: + raise SkipTest('Column {} should be skipped'.format(column_id)) resp = query(''' SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}') '''.format(column_id=column_id, schema='cdb_observatory.' if USE_SCHEMA else '', point=default_point(column_id))) - assert_equal(resp.status_code, 200) - rows = resp.json()['rows'] + rows = resp.fetchall() assert_equal(1, len(rows)) - assert_is_not_none(rows[0].values()[0]) + assert_is_not_none(rows[0][0]) #@parameterized(CATEGORY_COLUMNS) #def test_get_category_areas(column_id): @@ -164,20 +171,20 @@ SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) @parameterized(CATEGORY_COLUMNS) def test_get_category_points(column_id): - print 'test_get_category_points, ', column_id + if column_id in SKIP_COLUMNS: + raise SkipTest('Column {} should be skipped'.format(column_id)) resp = query(''' SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') '''.format(column_id=column_id, schema='cdb_observatory.' if USE_SCHEMA else '', point=default_point(column_id))) - assert_equal(resp.status_code, 200) - rows = resp.json()['rows'] + rows = resp.fetchall() assert_equal(1, len(rows)) - assert_is_not_none(rows[0].values()[0]) + assert_is_not_none(rows[0][0]) #@parameterized(BOUNDARY_COLUMNS) #def test_get_boundaries_by_geometry(column_id): @@ -189,7 +196,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) #@parameterized(BOUNDARY_COLUMNS) #def test_get_points_by_geometry(column_id): @@ -201,7 +208,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) #@parameterized(BOUNDARY_COLUMNS) #def test_get_boundary_points(column_id): @@ -213,7 +220,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) #@parameterized(BOUNDARY_COLUMNS) #def test_get_boundary_id(column_id): @@ -225,7 +232,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) #@parameterized(BOUNDARY_COLUMNS) #def test_get_boundary_by_id(column_id): @@ -237,4 +244,5 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}') # assert_equal(resp.status_code, 200) # rows = resp.json()['rows'] # assert_equal(1, len(rows)) -# assert_is_not_none(rows[0].values()[0]) +# assert_is_not_none(rows[0][0]) + diff --git a/src/python/test/perftest.py b/src/python/test/perftest.py new file mode 100644 index 0000000..660767a --- /dev/null +++ b/src/python/test/perftest.py @@ -0,0 +1,62 @@ +from nose.tools import assert_equal, assert_is_not_none +from nose_parameterized import parameterized + +from util import query, commit + +from time import time + +USE_SCHEMA = True + +for q in ( + 'DROP TABLE IF EXISTS obs_censustest', + '''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY, + the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''', + '''INSERT INTO obs_censustest (the_geom, name) + SELECT * FROM {schema}OBS_GetBoundariesByGeometry( + st_makeenvelope(-74.05437469482422,40.66319159533881, + -73.81885528564453,40.745696344339564, 4326), + 'us.census.tiger.block_group_clipped') As m(the_geom, geoid)''' +): + query(q.format( + schema='cdb_observatory.' if USE_SCHEMA else '', + )) + commit() + + +ARGS = { + 'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'", + 'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'", + 'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'", +} + +GEOMS = { + 'point': 'ST_PointOnSurface(the_geom)', + 'polygon_match': 'the_geom', + 'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)', +} + + +@parameterized([ + ('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'), + ('OBS_GetMeasureByID', 'us.census.tiger.county'), + ('OBS_GetMeasure', GEOMS['point']), + ('OBS_GetMeasure', GEOMS['polygon_match']), + ('OBS_GetMeasure', GEOMS['polygon_buffered']), + ('OBS_GetCategory', GEOMS['point']), + ('OBS_GetCategory', GEOMS['polygon_match']), + ('OBS_GetCategory', GEOMS['polygon_buffered']), +]) +def test_performance(api_method, arg): + print api_method, arg + col = 'measure' if 'measure' in api_method.lower() else 'category' + for rows in (1, 10, 50, 100): + q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format( + col=col, + schema='cdb_observatory.' if USE_SCHEMA else '', + api_method=api_method, + args=ARGS[api_method].format(arg), + n=rows+1) + start = time() + query(q) + end = time() + print rows, ': ', (rows / (end - start)), ' QPS' diff --git a/src/python/test/util.py b/src/python/test/util.py new file mode 100644 index 0000000..7311a46 --- /dev/null +++ b/src/python/test/util.py @@ -0,0 +1,31 @@ +import os +import psycopg2 + +DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format( + user=os.environ.get('PGUSER', 'postgres'), + password=os.environ.get('PGPASSWORD', ''), + host=os.environ.get('PGHOST', 'localhost'), + port=os.environ.get('PGPORT', '5432'), + database=os.environ.get('PGDATABASE', 'postgres'), +)) +CURSOR = DB_CONN.cursor() + + +def query(q): + ''' + Query the database. + ''' + try: + CURSOR.execute(q) + return CURSOR + except: + DB_CONN.rollback() + raise + + +def commit(): + try: + DB_CONN.commit() + except: + DB_CONN.rollback() + raise