dataservices-api/test/integration/test_street_functions.py
2018-09-13 16:01:53 +02:00

444 lines
20 KiB
Python

#!/usr/local/bin/python
# -*- coding: utf-8 -*-
from unittest import TestCase
from nose.tools import assert_not_equal, assert_equal, assert_true
from ..helpers.integration_test_helper import IntegrationTestHelper
from ..helpers.integration_test_helper import assert_close_enough, isclose
class TestStreetFunctionsSetUp(TestCase):
provider = None
fixture_points = None
GOOGLE_POINTS = {
'Plaza Mayor 1, Valladolid': [-4.728252, 41.6517025],
'Paseo Zorrilla, Valladolid': [-4.7404453, 41.6314339],
'1900 amphitheatre parkway': [-122.0875324, 37.4227968],
'1901 amphitheatre parkway': [-122.0885504, 37.4238657],
'1902 amphitheatre parkway': [-122.0876674, 37.4235729],
'Valladolid': [-4.7245321, 41.652251],
'Valladolid, Spain': [-4.7245321, 41.652251],
'Valladolid, Mexico': [-88.2022488, 20.68964],
'Madrid': [-3.7037902, 40.4167754],
'Logroño, Spain': [-2.4449852, 42.4627195],
'Logroño, Argentina': [-61.6961807, -29.5031057],
'Plaza España, Barcelona': [2.1482563, 41.375485]
}
HERE_POINTS = {
'Plaza Mayor 1, Valladolid': [-4.729, 41.65258],
'Paseo Zorrilla, Valladolid': [-4.73869, 41.63817],
'1900 amphitheatre parkway': [-122.0879468, 37.4234763],
'1901 amphitheatre parkway': [-122.0879253, 37.4238725],
'1902 amphitheatre parkway': [-122.0879531, 37.4234775],
'Valladolid': [-4.73214, 41.6542],
'Valladolid, Spain': [-4.73214, 41.6542],
'Valladolid, Mexico': [-88.20117, 20.69021],
'Madrid': [-3.70578, 40.42028],
'Logroño, Spain': [-2.45194, 42.46592],
'Logroño, Argentina': [-61.69604, -29.50425],
'Plaza España, Barcelona': [2.14834, 41.37494]
}
TOMTOM_POINTS = HERE_POINTS.copy()
TOMTOM_POINTS.update({
'Plaza Mayor 1, Valladolid': [-4.7286, 41.6523],
'Paseo Zorrilla, Valladolid': [-4.74031, 41.63181],
'Valladolid': [-4.72838, 41.6542],
'Valladolid, Spain': [-4.72838, 41.6542],
'Madrid': [-3.70035, 40.42028],
'Logroño, Spain': [-2.44998, 42.46592],
'Plaza España, Barcelona': [2.14856, 41.37516]
})
MAPBOX_POINTS = GOOGLE_POINTS.copy()
MAPBOX_POINTS.update({
'Logroño, Spain': [-2.44556, 42.47],
'Logroño, Argentina': [-70.687195, -33.470901], # TODO: huge mismatch
'Valladolid': [-4.72856, 41.652251],
'Valladolid, Spain': [-4.72856, 41.652251],
'1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch
'Madrid': [-3.69194, 40.4167754],
'Plaza España, Barcelona': [2.342231, 41.50677] # TODO: not ideal
})
FIXTURE_POINTS = {
'google': GOOGLE_POINTS,
'heremaps': HERE_POINTS,
'tomtom': TOMTOM_POINTS,
'mapbox': MAPBOX_POINTS
}
GOOGLE_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.9, 'precision': 'precise', 'match_types': ['point_of_interest']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.56, 'precision': 'interpolated', 'match_types': ['locality']}
}
HERE_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 1, 'precision': 'precise', 'match_types': ['street']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.89, 'precision': 'precise', 'match_types': ['street']} # Wrong. See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
}
TOMTOM_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.85, 'precision': 'precise', 'match_types': ['street']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.45, 'precision': 'interpolated', 'match_types': ['street']}
}
MAPBOX_METADATAS = {
'Plaza España, Barcelona':
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']},
'Santiago Rusiñol 123, Valladolid':
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']} # TODO: wrong
}
METADATAS = {
'google': GOOGLE_METADATAS,
'heremaps': HERE_METADATAS,
'tomtom': TOMTOM_METADATAS,
'mapbox': MAPBOX_METADATAS
}
def setUp(self):
self.env_variables = IntegrationTestHelper.get_environment_variables()
self.sql_api_url = "{0}://{1}.{2}/api/v1/sql".format(
self.env_variables['schema'],
self.env_variables['username'],
self.env_variables['host'],
)
if not self.fixture_points:
query = "select provider from " \
"cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
response = self._run_authenticated(query)
provider = response['rows'][0]['provider']
self.fixture_points = self.FIXTURE_POINTS[provider]
self.metadata = self.METADATAS[provider]
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
def _used_quota(self):
query = "select used_quota " \
"from cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
return self._run_authenticated(query)['rows'][0]['used_quota']
class TestStreetFunctions(TestStreetFunctionsSetUp):
def test_if_select_with_street_point_is_ok(self):
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1&api_key={1}".format(
self.env_variables['table_name'],
self.env_variables['api_key'])
geometry = IntegrationTestHelper.execute_query(self.sql_api_url, query)
assert_not_equal(geometry['geometry'], None)
def test_if_select_with_street_without_api_key_raise_error(self):
table = self.env_variables['table_name']
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1".format(table)
try:
IntegrationTestHelper.execute_query(self.sql_api_url, query)
except Exception as e:
assert_equal(e.message[0],
"permission denied for relation {}".format(table))
def test_component_aggregation(self):
query = "select st_x(the_geom), st_y(the_geom) from (" \
"select cdb_dataservices_client.cdb_geocode_street_point( " \
"'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
response = self._run_authenticated(query)
row = response['rows'][0]
x_y = [row['st_x'], row['st_y']]
# Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762]
assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona'])
class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
def test_full_spec(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Plaza Mayor 1'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Paseo Zorrilla'' as street' " \
", 'street', 'city', 'state', 'country')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Plaza Mayor 1, Valladolid'],
2: self.fixture_points['Paseo Zorrilla, Valladolid']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_empty_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', '''''', '''''', '''''')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['1901 amphitheatre parkway'])
def test_null_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['1901 amphitheatre parkway'])
def test_batching(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, 2)"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['1900 amphitheatre parkway'],
2: self.fixture_points['1901 amphitheatre parkway'],
3: self.fixture_points['1902 amphitheatre parkway'],
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_batch_size_1(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, 1)"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['1900 amphitheatre parkway'],
2: self.fixture_points['1901 amphitheatre parkway'],
3: self.fixture_points['1902 amphitheatre parkway'],
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_city_column_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"city\": \"Valladolid\"}," \
"{\"cartodb_id\": 2, \"city\": \"Madrid\"}" \
"]''::jsonb) as (cartodb_id integer, city text)', " \
"'city')"
response = self._run_authenticated(query)
assert_equal(response['total_rows'], 2)
points_by_cartodb_id = {
1: self.fixture_points['Valladolid'],
2: self.fixture_points['Madrid']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_free_text_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from (" \
"select 1 as cartodb_id, ''W 26th Street'' as address, " \
"null as city , null as state , null as country" \
")_x', " \
"'''Logroño, La Rioja, Spain''')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['Logroño, Spain'])
def test_templating_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Spain''') " \
"UNION " \
"SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 2 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Argentina''')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Logroño, Spain'],
2: self.fixture_points['Logroño, Argentina']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_template_with_two_columns_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
" 'select * from (' ||" \
" ' select 1 as cartodb_id, ''Valladolid'' as city, ''Mexico'' as country ' ||" \
" ' union all ' ||" \
" ' select 2, ''Valladolid'', ''Spain''' ||" \
" ') _x'," \
"'city || '', '' || country')"
response = self._run_authenticated(query)
points_by_cartodb_id = {
1: self.fixture_points['Valladolid, Mexico'],
2: self.fixture_points['Valladolid, Spain']
}
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_large_batches(self):
"""
Useful just to test a good batch size
"""
n = 110
first_cartodb_id = -1
first_street_number = 1
batch_size = 'NULL' # NULL for optimal
streets = []
for i in range(0, n):
streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \
'Toronto, Canada"}}'.format(first_cartodb_id + i,
first_street_number + i))
used_quota = self._used_quota()
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, {})".format(','.join(streets), batch_size)
response = self._run_authenticated(query)
assert_equal(n, len(response['rows']))
for row in response['rows']:
assert_not_equal(row['st_x'], None)
assert_not_equal(row['metadata'], {})
metadata = row['metadata']
assert_not_equal(metadata['relevance'], None)
assert_not_equal(metadata['precision'], None)
assert_not_equal(metadata['match_types'], None)
assert_equal(self._used_quota(), used_quota + n)
def test_missing_components_on_private_function(self):
query = "SELECT _cdb_bulk_geocode_street_point(" \
" '[{\"id\": \"1\", \"address\": \"Amphitheatre Parkway 22\"}]' " \
")"
response = self._run_authenticated(query)
assert_equal(1, len(response['rows']))
def test_semicolon(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway; mountain view; ca; us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null)"
response = self._run_authenticated(query)
x_y_by_cartodb_id = self._x_y_by_cartodb_id(response)
assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2])
def test_component_aggregation(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
"''Plaza España'' as street' " \
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['Plaza España, Barcelona'])
def _test_known_table(self):
subquery = 'select * from unknown_table where cartodb_id < 1100'
subquery_count = 'select count(1) from ({}) _x'.format(subquery)
count = self._run_authenticated(subquery_count)['rows'][0]['count']
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'{}' " \
", 'street', 'city', NULL, 'country')".format(subquery)
response = self._run_authenticated(query)
assert_equal(len(response['rows']), count)
assert_not_equal(response['rows'][0]['st_x'], None)
def test_metadata(self):
query = "select metadata " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
"''Plaza España'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Valladolid'' as city, " \
"''Santiago Rusiñol 123'' as street' " \
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
expected = [
self.metadata['Plaza España, Barcelona'],
self.metadata['Santiago Rusiñol 123, Valladolid']
]
assert_equal(len(response['rows']), len(expected))
for r, e in zip(response['rows'], expected):
self.assert_metadata(r['metadata'], e)
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
@staticmethod
def _x_y_by_cartodb_id(response):
return {r['cartodb_id']: [r['st_x'], r['st_y']]
for r in response['rows']}
@staticmethod
def assert_close_points(points_a_by_cartodb_id, points_b_by_cartodb_id):
assert_equal(len(points_a_by_cartodb_id), len(points_b_by_cartodb_id))
for cartodb_id, point in points_a_by_cartodb_id.iteritems():
assert_close_enough(point, points_b_by_cartodb_id[cartodb_id])
@staticmethod
def assert_metadata(metadata, expected):
relevance = metadata['relevance']
expected_relevance = expected['relevance']
assert_true(isclose(relevance, expected_relevance, 0.02),
'{} not close to {}'.format(relevance, expected_relevance))
assert_equal(metadata['precision'], expected['precision'])
assert_equal(metadata['match_types'], expected['match_types'])