From 825e3b7ee82080b4998849d85bb7a76d8e1c2ab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Ignacio=20S=C3=A1nchez=20Lara?= Date: Wed, 11 Jul 2018 07:42:21 +0200 Subject: [PATCH] Relevance metadata for Mapbox --- .../cartodb_services/mapbox/bulk_geocoder.py | 19 +++----- .../cartodb_services/mapbox/geocoder.py | 46 +++++++++++++------ test/integration/test_street_functions.py | 40 +++++++++++----- 3 files changed, 68 insertions(+), 37 deletions(-) diff --git a/server/lib/python/cartodb_services/cartodb_services/mapbox/bulk_geocoder.py b/server/lib/python/cartodb_services/cartodb_services/mapbox/bulk_geocoder.py index 63f5e72..3ddd457 100644 --- a/server/lib/python/cartodb_services/cartodb_services/mapbox/bulk_geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/mapbox/bulk_geocoder.py @@ -29,15 +29,10 @@ class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder): for search in searches: elements = self._encoded_elements(search) self._logger.debug('--> Sending serial search: {}'.format(search)) - coordinates = self._geocode_search(*elements) - results.append((search[0], coordinates, [])) - return results + result = self.geocode_meta(*elements) - def _geocode_search(self, address, city, state, country): - coordinates = self.geocode(searchtext=address, city=city, - state_province=state, country=country) - self._logger.debug('--> result sent') - return coordinates + results.append((search[0], result[0], result[1])) + return results def _encoded_elements(self, search): (search_id, address, city, state, country) = search @@ -58,11 +53,11 @@ class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder): frees.append(free) self._logger.debug('--> sending free search: {}'.format(frees)) - xy_results = self.geocode_free_text(frees) + full_results = self.geocode_free_text_meta(frees) results = [] - self._logger.debug('--> searches: {}; xy: {}'.format(searches, xy_results)) - for s, r in zip(searches, xy_results): - results.append((s[0], r, [])) + self._logger.debug('--> searches: {}; xy: {}'.format(searches, full_results)) + for s, r in zip(searches, full_results): + results.append((s[0], r[0], r[1])) self._logger.debug('--> results: {}'.format(results)) return results diff --git a/server/lib/python/cartodb_services/cartodb_services/mapbox/geocoder.py b/server/lib/python/cartodb_services/cartodb_services/mapbox/geocoder.py index a3d6f93..5d644d9 100644 --- a/server/lib/python/cartodb_services/cartodb_services/mapbox/geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/mapbox/geocoder.py @@ -22,6 +22,8 @@ ENTRY_COORDINATES = 'coordinates' ENTRY_TYPE = 'type' TYPE_POINT = 'Point' +EMPTY_RESPONSE = [[], {}] + class MapboxGeocoder(Traceable): ''' @@ -49,12 +51,16 @@ class MapboxGeocoder(Traceable): for a_json_response in json_response: if a_json_response[ENTRY_FEATURES]: feature = a_json_response[ENTRY_FEATURES][0] - result.append(self._extract_lng_lat_from_feature(feature)) + result.append([ + self._extract_lng_lat_from_feature(feature), + self._extract_metadata_from_result(feature) + ] + ) else: - result.append([]) + result.append(EMPTY_RESPONSE) return result else: - return [] + return EMPTY_RESPONSE def _extract_lng_lat_from_feature(self, feature): geometry = feature[ENTRY_GEOMETRY] @@ -67,6 +73,14 @@ class MapboxGeocoder(Traceable): latitude = location[1] return [longitude, latitude] + def _extract_metadata_from_result(self, result): + return { + 'relevance': self._normalize_relevance(float(result['relevance'])) + } + + def _normalize_relevance(self, relevance): + return 1 if relevance == 0.99 else relevance + def _validate_input(self, searchtext, city=None, state_province=None, country=None): if searchtext and searchtext.strip(): @@ -88,8 +102,13 @@ class MapboxGeocoder(Traceable): :param country: Country ISO 3166 code :return: [x, y] on success, [] on error """ + return self.geocode_meta(searchtext, city, state_province, country)[0] + + @qps_retry(qps=10) + def geocode_meta(self, searchtext, city=None, state_province=None, + country=None): if not self._validate_input(searchtext, city, state_province, country): - return [] + return EMPTY_RESPONSE address = [] if searchtext and searchtext.strip(): @@ -99,32 +118,31 @@ class MapboxGeocoder(Traceable): if state_province: address.append(normalize(state_province)) - country = [country] if country else None - free_search = ', '.join(address) - return self.geocode_free_text([free_search], country)[0] + return self.geocode_free_text_meta([free_search], country)[0] @qps_retry(qps=10) - def geocode_free_text(self, free_searches, country=None): + def geocode_free_text_meta(self, free_searches, country=None): """ :param free_searches: Free text searches :param country: Country ISO 3166 code :return: list of [x, y] on success, [] on error """ + country = [country] if country else None + try: free_search = ';'.join([self._escape(fs) for fs in free_searches]) - self._logger.debug('--> free search: {}'.format(free_search)) + self._logger.debug('--> free search: {}, country: {}'.format(free_search, country)) response = self._geocoder.forward(address=free_search.decode('utf-8'), - country=country, - limit=1) + country=country) if response.status_code == requests.codes.ok: return self._parse_geocoder_response(response.text) elif response.status_code == requests.codes.bad_request: - return [] + return EMPTY_RESPONSE elif response.status_code == requests.codes.unprocessable_entity: - return [] + return EMPTY_RESPONSE else: raise ServiceException(response.status_code, response) except requests.Timeout as te: @@ -138,7 +156,7 @@ class MapboxGeocoder(Traceable): # Don't raise the exception to continue with the geocoding job self._logger.error('Error connecting to Mapbox geocoding server', exception=ce) - return [] + return EMPTY_RESPONSE def _escape(self, free_search): # Semicolon is used to separate batch geocoding; there's no documented diff --git a/test/integration/test_street_functions.py b/test/integration/test_street_functions.py index e842396..b0c6deb 100644 --- a/test/integration/test_street_functions.py +++ b/test/integration/test_street_functions.py @@ -22,7 +22,7 @@ class TestStreetFunctionsSetUp(TestCase): 'Madrid': [-3.7037902, 40.4167754], 'Logroño, Spain': [-2.4449852, 42.4627195], 'Logroño, Argentina': [-61.6961807, -29.5031057], - 'Plaza España 1, Barcelona': [2.1482563, 41.375485] + 'Plaza España, Barcelona': [2.1482563, 41.375485] } HERE_POINTS = { @@ -37,7 +37,7 @@ class TestStreetFunctionsSetUp(TestCase): 'Madrid': [-3.70578, 40.42028], 'Logroño, Spain': [-2.45194, 42.46592], 'Logroño, Argentina': [-61.69604, -29.50425], - 'Plaza España 1, Barcelona': [2.1735699, 41.3823] # TODO: not ideal + 'Plaza España, Barcelona': [2.1735699, 41.3823] # TODO: not ideal } TOMTOM_POINTS = HERE_POINTS.copy() @@ -48,7 +48,7 @@ class TestStreetFunctionsSetUp(TestCase): 'Valladolid, Spain': [-4.72838, 41.6542], 'Madrid': [-3.70035, 40.42028], 'Logroño, Spain': [-2.44998, 42.46592], - 'Plaza España 1, Barcelona': [2.07479, 41.36818] # TODO: not ideal + 'Plaza España, Barcelona': [2.07479, 41.36818] # TODO: not ideal }) MAPBOX_POINTS = GOOGLE_POINTS.copy() @@ -59,7 +59,7 @@ class TestStreetFunctionsSetUp(TestCase): 'Valladolid, Spain': [-4.72856, 41.652251], '1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch 'Madrid': [-3.69194, 40.4167754], - 'Plaza España 1, Barcelona': [2.245969, 41.452483] # TODO: not ideal + 'Plaza España, Barcelona': [2.245969, 41.452483] # TODO: not ideal }) FIXTURE_POINTS = { @@ -69,6 +69,21 @@ class TestStreetFunctionsSetUp(TestCase): 'mapbox': MAPBOX_POINTS } + HERE_RELEVANCES = { + 'Plaza España, Barcelona': 1 + } + + MAPBOX_RELEVANCES = { + 'Plaza España, Barcelona': 0.75 + } + + RELEVANCES = { + 'here': HERE_RELEVANCES, + 'tomtom': HERE_RELEVANCES, + 'mapbox': MAPBOX_RELEVANCES, + 'google': HERE_RELEVANCES + } + def setUp(self): self.env_variables = IntegrationTestHelper.get_environment_variables() self.sql_api_url = "{0}://{1}.{2}/api/v1/sql".format( @@ -86,6 +101,8 @@ class TestStreetFunctionsSetUp(TestCase): provider = response['rows'][0]['provider'] self.fixture_points = self.FIXTURE_POINTS[provider] + self.relevances = self.RELEVANCES[provider] + def _run_authenticated(self, query): authenticated_query = "{}&api_key={}".format(query, @@ -118,12 +135,12 @@ class TestStreetFunctions(TestStreetFunctionsSetUp): def test_component_aggregation(self): query = "select st_x(the_geom), st_y(the_geom) from (" \ "select cdb_dataservices_client.cdb_geocode_street_point( " \ - "'Plaza España 1', 'Barcelona', null, 'Spain') as the_geom) _x" + "'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x" response = self._run_authenticated(query) row = response['rows'][0] x_y = [row['st_x'], row['st_y']] # Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762] - assert_close_enough(x_y, self.fixture_points['Plaza España 1, Barcelona']) + assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona']) class TestBulkStreetFunctions(TestStreetFunctionsSetUp): @@ -294,18 +311,18 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp): x_y_by_cartodb_id = self._x_y_by_cartodb_id(response) assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2]) - # "'Plaza España 1', 'Barcelona', null, 'Spain') as the_geom) _x" + # "'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x" def test_component_aggregation(self): query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \ "FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \ "'select 1 as cartodb_id, ''Spain'' as country, " \ "''Barcelona'' as city, " \ - "''Plaza España 1'' as street' " \ + "''Plaza España'' as street' " \ ", 'street', 'city', NULL, 'country')" response = self._run_authenticated(query) assert_close_enough(self._x_y_by_cartodb_id(response)[1], - self.fixture_points['Plaza España 1, Barcelona']) + self.fixture_points['Plaza España, Barcelona']) def _test_known_table(self): subquery = 'select * from known_table where cartodb_id < 1100' @@ -325,11 +342,12 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp): "FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \ "'select 1 as cartodb_id, ''Spain'' as country, " \ "''Barcelona'' as city, " \ - "''Plaza España 1'' as street' " \ + "''Plaza España'' as street' " \ ", 'street', 'city', NULL, 'country')" response = self._run_authenticated(query) - assert_true(isclose(response['rows'][0]['metadata']['relevance'], 1)) + assert_true(isclose(response['rows'][0]['metadata']['relevance'], + self.relevances['Plaza España, Barcelona'])) def _run_authenticated(self, query): authenticated_query = "{}&api_key={}".format(query,