Merge pull request #182 from CartoDB/sql-tests

SQL Integration and Performance Tests
This commit is contained in:
Mario de Frutos 2016-08-18 14:54:53 +02:00 committed by GitHub
commit 4c13434b9a
7 changed files with 196 additions and 71 deletions

View File

@ -1,3 +1,10 @@
1.0.5 (2016-08-12)
__Improvements__
* Integration tests moved to `src/python/test/`, and can be run without hitting
any HTTP SQL API.
1.0.4 (2016-07-26)
__Bugfixes__

View File

@ -1,5 +1,5 @@
comment = 'CartoDB Observatory backend extension'
default_version = '1.0.4'
default_version = '1.0.5'
requires = 'postgis, postgres_fdw'
superuser = true
schema = cdb_observatory

View File

@ -0,0 +1,3 @@
nose
nose_parameterized
psycopg2

14
src/python/test/README.md Normal file
View File

@ -0,0 +1,14 @@
### Integration/performance tests
Tests here are meant to be run on a box with an Observatory meta/data dump
loaded and ready to be tested against the API.
The local Python needs the requirements in `src/python/requirements.txt`.
In order to find and access the correct database, the `PGUSER`, `PGPASSWORD`,
`PGHOST`, `PGPORT` and `PGDATABASE` env variables should be set.
Tests should be executed as follows:
nosetests test/autotest.py
nosetests -s test/perftest.py

View File

@ -1,77 +1,86 @@
from nose.tools import assert_equal, assert_is_not_none
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
import os
import re
import requests
from util import query
HOSTNAME = os.environ['OBS_HOSTNAME']
API_KEY = os.environ['OBS_API_KEY']
META_HOSTNAME = os.environ.get('OBS_META_HOSTNAME', HOSTNAME)
META_API_KEY = os.environ.get('OBS_META_API_KEY', API_KEY)
USE_SCHEMA = 'OBS_USE_SCHEMA' in os.environ
USE_SCHEMA = True
def query(q, is_meta=False, **options):
'''
Query the account. Returned is the response, wrapped by the requests
library.
'''
url = 'https://{hostname}/api/v2/sql'.format(
hostname=META_HOSTNAME if is_meta else HOSTNAME)
params = options.copy()
params['q'] = re.sub(r'\s+', ' ', q)
params['api_key'] = META_API_KEY if is_meta else API_KEY
return requests.get(url, params=params)
MEASURE_COLUMNS = [(r['numer_id'], r['point_only'], ) for r in query('''
MEASURE_COLUMNS = query('''
SELECT distinct numer_id, numer_aggregate NOT ILIKE 'sum' as point_only
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'numeric'
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
CATEGORY_COLUMNS = [(r['numer_id'], ) for r in query('''
CATEGORY_COLUMNS = query('''
SELECT distinct numer_id
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'text'
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
BOUNDARY_COLUMNS = [(r['id'], ) for r in query('''
SELECT id FROM obs_column
BOUNDARY_COLUMNS = query('''
SELECT id FROM observatory.obs_column
WHERE type ILIKE 'geometry'
AND weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
US_CENSUS_MEASURE_COLUMNS = [(r['numer_name'], ) for r in query('''
US_CENSUS_MEASURE_COLUMNS = query('''
SELECT distinct numer_name
FROM obs_meta
FROM observatory.obs_meta
WHERE numer_type ILIKE 'numeric'
AND 'us.census.acs.acs' = ANY (subsection_tags)
AND numer_weight > 0
''', is_meta=True).json()['rows']]
''').fetchall()
SKIP_COLUMNS = set([
u'mx.inegi_columns.INDI18',
u'mx.inegi_columns.ECO40',
u'mx.inegi_columns.POB34',
u'mx.inegi_columns.POB63',
u'mx.inegi_columns.INDI7',
u'mx.inegi_columns.EDU28',
u'mx.inegi_columns.SCONY10',
u'mx.inegi_columns.EDU31',
u'mx.inegi_columns.POB7',
u'mx.inegi_columns.VIV30',
u'mx.inegi_columns.INDI12',
u'mx.inegi_columns.EDU13',
u'mx.inegi_columns.ECO43',
u'mx.inegi_columns.VIV9',
u'mx.inegi_columns.HOGAR25',
u'mx.inegi_columns.POB32',
u'mx.inegi_columns.ECO7',
u'mx.inegi_columns.INDI19',
u'mx.inegi_columns.INDI16',
u'mx.inegi_columns.POB65',
u'mx.inegi_columns.INDI3',
u'mx.inegi_columns.INDI9',
u'mx.inegi_columns.POB36',
u'mx.inegi_columns.POB33',
u'mx.inegi_columns.POB58',
])
def default_geometry_id(column_id):
'''
Returns default test point for the column_id.
'''
if column_id == 'whosonfirst.wof_disputed_geom':
return 'CDB_LatLng(33.78, 76.57)'
return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
elif column_id == 'whosonfirst.wof_marinearea_geom':
return 'CDB_LatLng(43.33, -68.47)'
return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return 'CDB_LatLng(40.7025, -73.7067)'
return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
elif column_id.startswith('es.ine'):
return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
elif column_id.startswith('us.zillow'):
return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
else:
return 'CDB_LatLng(40.7, -73.9)'
return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_point(column_id):
@ -79,27 +88,27 @@ def default_point(column_id):
Returns default test point for the column_id.
'''
if column_id == 'whosonfirst.wof_disputed_geom':
return 'CDB_LatLng(33.78, 76.57)'
return 'ST_SetSRID(ST_MakePoint(76.57, 33.78), 4326)'
elif column_id == 'whosonfirst.wof_marinearea_geom':
return 'CDB_LatLng(43.33, -68.47)'
return 'ST_SetSRID(ST_MakePoint(-68.47, 43.33), 4326)'
elif column_id in ('us.census.tiger.school_district_elementary',
'us.census.tiger.school_district_secondary',
'us.census.tiger.school_district_elementary_clipped',
'us.census.tiger.school_district_secondary_clipped'):
return 'CDB_LatLng(40.7025, -73.7067)'
return 'ST_SetSRID(ST_MakePoint(-73.7067, 40.7025), 4326)'
elif column_id.startswith('uk'):
if 'WA' in column_id:
return 'CDB_LatLng(51.46844551219723, -3.184833526611328)'
return 'ST_SetSRID(ST_MakePoint(-3.184833526611328, 51.46844551219723), 4326)'
else:
return 'CDB_LatLng(51.51461834694225, -0.08883476257324219)'
return 'ST_SetSRID(ST_MakePoint(-0.08883476257324219, 51.51461834694225), 4326)'
elif column_id.startswith('es'):
return 'CDB_LatLng(42.8226119029222, -2.51141249535454)'
return 'ST_SetSRID(ST_MakePoint(-2.51141249535454, 42.8226119029222), 4326)'
elif column_id.startswith('us.zillow'):
return 'CDB_LatLng(28.3305906291771, -81.3544048197256)'
return 'ST_SetSRID(ST_MakePoint(-81.3544048197256, 28.3305906291771), 4326)'
elif column_id.startswith('mx.'):
return 'CDB_LatLng(19.41347699386547, -99.17019367218018)'
return 'ST_SetSRID(ST_MakePoint(-99.17019367218018, 19.41347699386547), 4326)'
else:
return 'CDB_LatLng(40.7, -73.9)'
return 'ST_SetSRID(ST_MakePoint(-73.9, 40.7), 4326)'
def default_area(column_id):
@ -113,21 +122,20 @@ def default_area(column_id):
@parameterized(US_CENSUS_MEASURE_COLUMNS)
def test_get_us_census_measure_points(name):
print 'test_get_us_census_measure_points, ', name
resp = query('''
SELECT * FROM {schema}OBS_GetUSCensusMeasure({point}, '{name}')
'''.format(name=name.replace("'", "''"),
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point('')))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
@parameterized(MEASURE_COLUMNS)
def test_get_measure_areas(column_id, point_only):
print 'test_get_measure_areas, ', column_id, point_only
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
if point_only:
return
resp = query('''
@ -135,24 +143,23 @@ SELECT * FROM {schema}OBS_GetMeasure({area}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
area=default_area(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
@parameterized(MEASURE_COLUMNS)
def test_get_measure_points(column_id, point_only):
print 'test_get_measure_points, ', column_id, point_only
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
resp = query('''
SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
#@parameterized(CATEGORY_COLUMNS)
#def test_get_category_areas(column_id):
@ -164,20 +171,20 @@ SELECT * FROM {schema}OBS_GetMeasure({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
@parameterized(CATEGORY_COLUMNS)
def test_get_category_points(column_id):
print 'test_get_category_points, ', column_id
if column_id in SKIP_COLUMNS:
raise SkipTest('Column {} should be skipped'.format(column_id))
resp = query('''
SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
'''.format(column_id=column_id,
schema='cdb_observatory.' if USE_SCHEMA else '',
point=default_point(column_id)))
assert_equal(resp.status_code, 200)
rows = resp.json()['rows']
rows = resp.fetchall()
assert_equal(1, len(rows))
assert_is_not_none(rows[0].values()[0])
assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundaries_by_geometry(column_id):
@ -189,7 +196,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_points_by_geometry(column_id):
@ -201,7 +208,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_points(column_id):
@ -213,7 +220,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_id(column_id):
@ -225,7 +232,7 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])
#@parameterized(BOUNDARY_COLUMNS)
#def test_get_boundary_by_id(column_id):
@ -237,4 +244,5 @@ SELECT * FROM {schema}OBS_GetCategory({point}, '{column_id}')
# assert_equal(resp.status_code, 200)
# rows = resp.json()['rows']
# assert_equal(1, len(rows))
# assert_is_not_none(rows[0].values()[0])
# assert_is_not_none(rows[0][0])

View File

@ -0,0 +1,62 @@
from nose.tools import assert_equal, assert_is_not_none
from nose_parameterized import parameterized
from util import query, commit
from time import time
USE_SCHEMA = True
for q in (
'DROP TABLE IF EXISTS obs_censustest',
'''CREATE TABLE obs_censustest (cartodb_id SERIAL PRIMARY KEY,
the_geom GEOMETRY, name TEXT, measure NUMERIC, category TEXT)''',
'''INSERT INTO obs_censustest (the_geom, name)
SELECT * FROM {schema}OBS_GetBoundariesByGeometry(
st_makeenvelope(-74.05437469482422,40.66319159533881,
-73.81885528564453,40.745696344339564, 4326),
'us.census.tiger.block_group_clipped') As m(the_geom, geoid)'''
):
query(q.format(
schema='cdb_observatory.' if USE_SCHEMA else '',
))
commit()
ARGS = {
'OBS_GetMeasureByID': "name, 'us.census.acs.B01001002', '{}'",
'OBS_GetMeasure': "{}, 'us.census.acs.B01001002'",
'OBS_GetCategory': "{}, 'us.census.spielman_singleton_segments.X10'",
}
GEOMS = {
'point': 'ST_PointOnSurface(the_geom)',
'polygon_match': 'the_geom',
'polygon_buffered': 'ST_Buffer(the_geom::GEOGRAPHY, 1000)::GEOMETRY(GEOMETRY, 4326)',
}
@parameterized([
('OBS_GetMeasureByID', 'us.census.tiger.block_group_clipped'),
('OBS_GetMeasureByID', 'us.census.tiger.county'),
('OBS_GetMeasure', GEOMS['point']),
('OBS_GetMeasure', GEOMS['polygon_match']),
('OBS_GetMeasure', GEOMS['polygon_buffered']),
('OBS_GetCategory', GEOMS['point']),
('OBS_GetCategory', GEOMS['polygon_match']),
('OBS_GetCategory', GEOMS['polygon_buffered']),
])
def test_performance(api_method, arg):
print api_method, arg
col = 'measure' if 'measure' in api_method.lower() else 'category'
for rows in (1, 10, 50, 100):
q = 'UPDATE obs_censustest SET {col} = {schema}{api_method}({args}) WHERE cartodb_id < {n}'.format(
col=col,
schema='cdb_observatory.' if USE_SCHEMA else '',
api_method=api_method,
args=ARGS[api_method].format(arg),
n=rows+1)
start = time()
query(q)
end = time()
print rows, ': ', (rows / (end - start)), ' QPS'

31
src/python/test/util.py Normal file
View File

@ -0,0 +1,31 @@
import os
import psycopg2
DB_CONN = psycopg2.connect('postgres://{user}:{password}@{host}:{port}/{database}'.format(
user=os.environ.get('PGUSER', 'postgres'),
password=os.environ.get('PGPASSWORD', ''),
host=os.environ.get('PGHOST', 'localhost'),
port=os.environ.get('PGPORT', '5432'),
database=os.environ.get('PGDATABASE', 'postgres'),
))
CURSOR = DB_CONN.cursor()
def query(q):
'''
Query the database.
'''
try:
CURSOR.execute(q)
return CURSOR
except:
DB_CONN.rollback()
raise
def commit():
try:
DB_CONN.commit()
except:
DB_CONN.rollback()
raise