dataservices-api/test/integration/test_street_functions.py

427 lines
19 KiB
Python
Raw Normal View History

#!/usr/local/bin/python
# -*- coding: utf-8 -*-
from unittest import TestCase
2018-07-11 02:30:01 +08:00
from nose.tools import assert_not_equal, assert_equal, assert_true
from ..helpers.integration_test_helper import IntegrationTestHelper
2018-07-11 02:30:01 +08:00
from ..helpers.integration_test_helper import assert_close_enough, isclose
class TestStreetFunctionsSetUp(TestCase):
2018-06-30 01:18:53 +08:00
provider = None
fixture_points = None
GOOGLE_POINTS = {
'Plaza Mayor, Valladolid': [-4.728252, 41.6517025],
'Paseo Zorrilla, Valladolid': [-4.7404453, 41.6314339],
'1900 amphitheatre parkway': [-122.0875324, 37.4227968],
'1901 amphitheatre parkway': [-122.0885504, 37.4238657],
'1902 amphitheatre parkway': [-122.0876674, 37.4235729],
'Valladolid': [-4.7245321, 41.652251],
'Valladolid, Spain': [-4.7245321, 41.652251],
'Valladolid, Mexico': [-88.2022488, 20.68964],
'Madrid': [-3.7037902, 40.4167754],
'Logroño, Spain': [-2.4449852, 42.4627195],
2018-07-10 21:17:14 +08:00
'Logroño, Argentina': [-61.6961807, -29.5031057],
2018-07-11 13:42:21 +08:00
'Plaza España, Barcelona': [2.1482563, 41.375485]
2018-06-30 01:18:53 +08:00
}
HERE_POINTS = {
'Plaza Mayor, Valladolid': [-4.72979, 41.65258],
'Paseo Zorrilla, Valladolid': [-4.73869, 41.63817],
'1900 amphitheatre parkway': [-122.0879468, 37.4234763],
'1901 amphitheatre parkway': [-122.0879253, 37.4238725],
'1902 amphitheatre parkway': [-122.0879531, 37.4234775],
'Valladolid': [-4.73214, 41.6542],
'Valladolid, Spain': [-4.73214, 41.6542],
'Valladolid, Mexico': [-88.20117, 20.69021],
'Madrid': [-3.70578, 40.42028],
'Logroño, Spain': [-2.45194, 42.46592],
2018-07-10 21:17:14 +08:00
'Logroño, Argentina': [-61.69604, -29.50425],
2018-07-17 19:56:44 +08:00
'Plaza España, Barcelona': [2.14834, 41.37494]
2018-06-30 01:18:53 +08:00
}
TOMTOM_POINTS = HERE_POINTS.copy()
TOMTOM_POINTS.update({
'Plaza Mayor, Valladolid': [-4.72183, 41.5826],
'Paseo Zorrilla, Valladolid': [-4.74031, 41.63181],
'Valladolid': [-4.72838, 41.6542],
'Valladolid, Spain': [-4.72838, 41.6542],
'Madrid': [-3.70035, 40.42028],
'Logroño, Spain': [-2.44998, 42.46592],
2018-07-17 19:56:44 +08:00
'Plaza España, Barcelona': [2.1497, 41.37516]
})
2018-07-07 18:14:33 +08:00
MAPBOX_POINTS = GOOGLE_POINTS.copy()
MAPBOX_POINTS.update({
'Logroño, Spain': [-2.44556, 42.47],
'Logroño, Argentina': [-70.687195, -33.470901], # TODO: huge mismatch
'Valladolid': [-4.72856, 41.652251],
'Valladolid, Spain': [-4.72856, 41.652251],
'1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch
'Madrid': [-3.69194, 40.4167754],
2018-07-17 19:56:44 +08:00
'Plaza España, Barcelona': [2.342231, 41.50677] # TODO: not ideal
2018-07-07 18:14:33 +08:00
})
2018-06-30 01:18:53 +08:00
FIXTURE_POINTS = {
'google': GOOGLE_POINTS,
'heremaps': HERE_POINTS,
2018-07-07 18:14:33 +08:00
'tomtom': TOMTOM_POINTS,
'mapbox': MAPBOX_POINTS
2018-06-30 01:18:53 +08:00
}
2018-07-11 19:59:48 +08:00
GOOGLE_METADATAS = {
2018-07-11 21:30:51 +08:00
'Plaza España, Barcelona':
2018-07-16 17:34:33 +08:00
{'relevance': 0.9, 'precision': 'precise', 'match_types': ['point_of_interest']},
2018-07-11 21:30:51 +08:00
'Santiago Rusiñol 123, Valladolid':
2018-07-16 17:34:33 +08:00
{'relevance': 0.56, 'precision': 'interpolated', 'match_types': ['locality']}
2018-07-11 13:42:21 +08:00
}
2018-07-11 19:59:48 +08:00
HERE_METADATAS = {
2018-07-11 21:30:51 +08:00
'Plaza España, Barcelona':
2018-07-16 17:52:25 +08:00
{'relevance': 1, 'precision': 'precise', 'match_types': ['street']},
2018-07-11 21:30:51 +08:00
'Santiago Rusiñol 123, Valladolid':
2018-07-16 17:52:25 +08:00
{'relevance': 0.89, 'precision': 'precise', 'match_types': ['street']} # Wrong. See https://stackoverflow.com/questions/51285622/missing-matchtype-at-here-geocoding-responses
2018-07-11 19:59:48 +08:00
}
2018-07-11 15:30:28 +08:00
2018-07-11 19:59:48 +08:00
TOMTOM_METADATAS = {
2018-07-12 01:09:02 +08:00
'Plaza España, Barcelona':
2018-07-16 18:01:55 +08:00
{'relevance': 0.85, 'precision': 'precise', 'match_types': ['street']},
2018-07-12 01:09:02 +08:00
'Santiago Rusiñol 123, Valladolid':
2018-07-17 19:53:39 +08:00
{'relevance': 0.45, 'precision': 'interpolated', 'match_types': ['street']}
2018-07-11 19:59:48 +08:00
}
2018-07-11 13:42:21 +08:00
2018-07-11 19:59:48 +08:00
MAPBOX_METADATAS = {
2018-07-12 01:28:16 +08:00
'Plaza España, Barcelona':
2018-07-17 19:53:39 +08:00
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']},
2018-07-12 01:28:16 +08:00
'Santiago Rusiñol 123, Valladolid':
2018-07-17 19:53:39 +08:00
{'relevance': 0.67, 'precision': 'precise', 'match_types': ['point_of_interest']} # TODO: wrong
2018-07-11 19:59:48 +08:00
}
2018-07-11 17:43:54 +08:00
2018-07-11 19:59:48 +08:00
METADATAS = {
'google': GOOGLE_METADATAS,
2018-07-11 21:30:51 +08:00
'heremaps': HERE_METADATAS,
2018-07-11 19:59:48 +08:00
'tomtom': TOMTOM_METADATAS,
'mapbox': MAPBOX_METADATAS
2018-07-11 13:42:21 +08:00
}
2018-06-30 01:18:53 +08:00
def setUp(self):
2018-07-10 21:17:14 +08:00
self.env_variables = IntegrationTestHelper.get_environment_variables()
self.sql_api_url = "{0}://{1}.{2}/api/v1/sql".format(
self.env_variables['schema'],
self.env_variables['username'],
self.env_variables['host'],
self.env_variables['api_key']
)
2018-06-30 01:18:53 +08:00
if not self.fixture_points:
query = "select provider from " \
"cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
response = self._run_authenticated(query)
provider = response['rows'][0]['provider']
self.fixture_points = self.FIXTURE_POINTS[provider]
2018-07-11 19:59:48 +08:00
self.metadata = self.METADATAS[provider]
2018-07-11 13:42:21 +08:00
2018-07-10 21:17:14 +08:00
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
2018-07-17 20:39:24 +08:00
def _used_quota(self):
query = "select used_quota " \
"from cdb_dataservices_client.cdb_service_quota_info() " \
"where service = 'hires_geocoder'"
return self._run_authenticated(query)['rows'][0]['used_quota']
2018-07-10 21:17:14 +08:00
class TestStreetFunctions(TestStreetFunctionsSetUp):
def test_if_select_with_street_point_is_ok(self):
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1&api_key={1}".format(
self.env_variables['table_name'],
self.env_variables['api_key'])
geometry = IntegrationTestHelper.execute_query(self.sql_api_url, query)
assert_not_equal(geometry['geometry'], None)
def test_if_select_with_street_without_api_key_raise_error(self):
table = self.env_variables['table_name']
query = "SELECT cdb_dataservices_client.cdb_geocode_street_point(street) " \
"as geometry FROM {0} LIMIT 1".format(table)
try:
IntegrationTestHelper.execute_query(self.sql_api_url, query)
except Exception as e:
assert_equal(e.message[0],
"permission denied for relation {}".format(table))
def test_component_aggregation(self):
query = "select st_x(the_geom), st_y(the_geom) from (" \
"select cdb_dataservices_client.cdb_geocode_street_point( " \
2018-07-11 13:42:21 +08:00
"'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
2018-07-10 21:17:14 +08:00
response = self._run_authenticated(query)
row = response['rows'][0]
x_y = [row['st_x'], row['st_y']]
# Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762]
2018-07-11 13:42:21 +08:00
assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona'])
2018-07-10 21:17:14 +08:00
class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
def test_full_spec(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Plaza Mayor'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Castilla y León'' as state, ''Valladolid'' as city, " \
"''Paseo Zorrilla'' as street' " \
", 'street', 'city', 'state', 'country')"
response = self._run_authenticated(query)
2018-06-30 01:18:53 +08:00
points_by_cartodb_id = {
1: self.fixture_points['Plaza Mayor, Valladolid'],
2: self.fixture_points['Paseo Zorrilla, Valladolid']
}
2018-07-04 19:24:11 +08:00
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_empty_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', '''''', '''''', '''''')"
response = self._run_authenticated(query)
2018-07-04 19:24:11 +08:00
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
2018-06-30 01:18:53 +08:00
self.fixture_points['1901 amphitheatre parkway'])
def test_null_columns(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address')"
response = self._run_authenticated(query)
2018-07-04 19:24:11 +08:00
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
2018-06-30 01:18:53 +08:00
self.fixture_points['1901 amphitheatre parkway'])
def test_batching(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1901 amphitheatre parkway, mountain view, ca, us\"}," \
"{\"cartodb_id\": 3, \"address\": \"1902 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null, 2)"
response = self._run_authenticated(query)
2018-06-30 01:18:53 +08:00
points_by_cartodb_id = {
1: self.fixture_points['1900 amphitheatre parkway'],
2: self.fixture_points['1901 amphitheatre parkway'],
3: self.fixture_points['1902 amphitheatre parkway'],
}
2018-07-04 19:24:11 +08:00
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
def test_city_column_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"city\": \"Valladolid\"}," \
"{\"cartodb_id\": 2, \"city\": \"Madrid\"}" \
"]''::jsonb) as (cartodb_id integer, city text)', " \
"'city')"
response = self._run_authenticated(query)
assert_equal(response['total_rows'], 2)
2018-06-30 01:18:53 +08:00
points_by_cartodb_id = {
1: self.fixture_points['Valladolid'],
2: self.fixture_points['Madrid']
}
2018-07-04 19:24:11 +08:00
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
2018-06-26 22:52:41 +08:00
def test_free_text_geocoding(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from (" \
"select 1 as cartodb_id, ''W 26th Street'' as address, " \
"null as city , null as state , null as country" \
")_x', " \
"'''Logroño, La Rioja, Spain''')"
response = self._run_authenticated(query)
2018-07-04 19:24:11 +08:00
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
2018-06-30 01:18:53 +08:00
self.fixture_points['Logroño, Spain'])
2018-06-26 22:52:41 +08:00
2018-06-27 14:51:40 +08:00
def test_templating_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Spain''') " \
"UNION " \
"SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 2 as cartodb_id, ''Logroño'' as city', " \
"'city || '', '' || ''Argentina''')"
response = self._run_authenticated(query)
2018-06-30 01:18:53 +08:00
points_by_cartodb_id = {
1: self.fixture_points['Logroño, Spain'],
2: self.fixture_points['Logroño, Argentina']
}
2018-07-04 19:24:11 +08:00
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
2018-06-27 14:51:40 +08:00
def test_template_with_two_columns_geocoding(self):
query = "SELECT cartodb_id, st_x(the_geom), st_y(the_geom) from " \
"cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
" 'select * from (' ||" \
" ' select 1 as cartodb_id, ''Valladolid'' as city, ''Mexico'' as country ' ||" \
" ' union all ' ||" \
" ' select 2, ''Valladolid'', ''Spain''' ||" \
" ') _x'," \
"'city || '', '' || country')"
response = self._run_authenticated(query)
2018-06-30 01:18:53 +08:00
points_by_cartodb_id = {
1: self.fixture_points['Valladolid, Mexico'],
2: self.fixture_points['Valladolid, Spain']
}
2018-07-04 19:24:11 +08:00
self.assert_close_points(self._x_y_by_cartodb_id(response), points_by_cartodb_id)
2018-07-03 00:35:36 +08:00
def test_large_batches(self):
"""
Useful just to test a good batch size
"""
2018-07-11 01:06:49 +08:00
n = 110
2018-07-11 18:28:39 +08:00
first_cartodb_id = -1
first_street_number = 1
2018-07-11 01:06:49 +08:00
batch_size = 'NULL' # NULL for optimal
2018-07-03 00:35:36 +08:00
streets = []
for i in range(0, n):
streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \
2018-07-11 18:28:39 +08:00
'Toronto, Canada"}}'.format(first_cartodb_id + i,
first_street_number + i))
2018-07-03 00:35:36 +08:00
2018-07-17 20:39:24 +08:00
used_quota = self._used_quota()
2018-07-03 00:35:36 +08:00
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
2018-07-11 01:06:49 +08:00
"'address', null, null, null, {})".format(','.join(streets), batch_size)
2018-07-03 00:35:36 +08:00
response = self._run_authenticated(query)
2018-07-11 18:28:39 +08:00
assert_equal(n, len(response['rows']))
for row in response['rows']:
assert_not_equal(row['st_x'], None)
2018-07-16 18:43:40 +08:00
assert_not_equal(row['metadata'], {})
metadata = row['metadata']
assert_not_equal(metadata['relevance'], None)
assert_not_equal(metadata['precision'], None)
assert_not_equal(metadata['match_types'], None)
2018-07-03 00:35:36 +08:00
2018-07-17 20:39:24 +08:00
assert_equal(self._used_quota(), used_quota + n)
2018-07-06 21:52:10 +08:00
def test_missing_components_on_private_function(self):
query = "SELECT _cdb_bulk_geocode_street_point(" \
" '[{\"id\": \"1\", \"address\": \"Amphitheatre Parkway 22\"}]' " \
")"
response = self._run_authenticated(query)
assert_equal(1, len(response['rows']))
2018-07-10 00:17:45 +08:00
def test_semicolon(self):
query = "select *, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
"'select * from jsonb_to_recordset(''[" \
"{\"cartodb_id\": 1, \"address\": \"1900 amphitheatre parkway; mountain view; ca; us\"}," \
"{\"cartodb_id\": 2, \"address\": \"1900 amphitheatre parkway, mountain view, ca, us\"}" \
"]''::jsonb) as (cartodb_id integer, address text)', " \
"'address', null, null, null)"
response = self._run_authenticated(query)
x_y_by_cartodb_id = self._x_y_by_cartodb_id(response)
assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2])
2018-07-10 21:17:14 +08:00
def test_component_aggregation(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
2018-07-11 13:42:21 +08:00
"''Plaza España'' as street' " \
2018-07-10 21:17:14 +08:00
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1],
2018-07-11 13:42:21 +08:00
self.fixture_points['Plaza España, Barcelona'])
2018-07-10 21:17:14 +08:00
2018-07-11 01:06:49 +08:00
def _test_known_table(self):
2018-07-11 17:43:54 +08:00
subquery = 'select * from unknown_table where cartodb_id < 1100'
2018-07-11 01:06:49 +08:00
subquery_count = 'select count(1) from ({}) _x'.format(subquery)
count = self._run_authenticated(subquery_count)['rows'][0]['count']
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'{}' " \
", 'street', 'city', NULL, 'country')".format(subquery)
response = self._run_authenticated(query)
assert_equal(len(response['rows']), count)
assert_not_equal(response['rows'][0]['st_x'], None)
2018-07-11 19:59:48 +08:00
def test_metadata(self):
2018-07-11 02:30:01 +08:00
query = "select metadata " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \
2018-07-11 19:59:48 +08:00
"''Plaza España'' as street " \
"UNION " \
"select 2 as cartodb_id, ''Spain'' as country, " \
"''Valladolid'' as city, " \
2018-07-12 01:09:02 +08:00
"''Santiago Rusiñol 123'' as street' " \
2018-07-11 02:30:01 +08:00
", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query)
2018-07-11 19:59:48 +08:00
expected = [
self.metadata['Plaza España, Barcelona'],
self.metadata['Santiago Rusiñol 123, Valladolid']
]
assert_equal(len(response['rows']), len(expected))
2018-07-11 19:59:48 +08:00
for r, e in zip(response['rows'], expected):
self.assert_metadata(r['metadata'], e)
2018-07-11 01:06:49 +08:00
def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query,
self.env_variables[
'api_key'])
return IntegrationTestHelper.execute_query_raw(self.sql_api_url,
authenticated_query)
2018-06-30 01:18:53 +08:00
@staticmethod
def _x_y_by_cartodb_id(response):
return {r['cartodb_id']: [r['st_x'], r['st_y']]
for r in response['rows']}
2018-07-04 19:24:11 +08:00
@staticmethod
def assert_close_points(points_a_by_cartodb_id, points_b_by_cartodb_id):
2018-07-05 23:57:17 +08:00
assert_equal(len(points_a_by_cartodb_id), len(points_b_by_cartodb_id))
2018-07-04 19:24:11 +08:00
for cartodb_id, point in points_a_by_cartodb_id.iteritems():
assert_close_enough(point, points_b_by_cartodb_id[cartodb_id])
2018-07-11 19:59:48 +08:00
@staticmethod
def assert_metadata(metadata, expected):
relevance = metadata['relevance']
expected_relevance = expected['relevance']
2018-07-17 19:53:39 +08:00
assert_true(isclose(relevance, expected_relevance, 0.02),
2018-07-11 19:59:48 +08:00
'{} not close to {}'.format(relevance, expected_relevance))
assert_equal(metadata['precision'], expected['precision'])
2018-07-16 17:34:33 +08:00
assert_equal(metadata['match_types'], expected['match_types'])