Relevance metadata for Mapbox

This commit is contained in:
Juan Ignacio Sánchez Lara 2018-07-11 07:42:21 +02:00
parent 2af9204542
commit 825e3b7ee8
3 changed files with 68 additions and 37 deletions

View File

@ -29,15 +29,10 @@ class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder):
for search in searches: for search in searches:
elements = self._encoded_elements(search) elements = self._encoded_elements(search)
self._logger.debug('--> Sending serial search: {}'.format(search)) self._logger.debug('--> Sending serial search: {}'.format(search))
coordinates = self._geocode_search(*elements) result = self.geocode_meta(*elements)
results.append((search[0], coordinates, []))
return results
def _geocode_search(self, address, city, state, country): results.append((search[0], result[0], result[1]))
coordinates = self.geocode(searchtext=address, city=city, return results
state_province=state, country=country)
self._logger.debug('--> result sent')
return coordinates
def _encoded_elements(self, search): def _encoded_elements(self, search):
(search_id, address, city, state, country) = search (search_id, address, city, state, country) = search
@ -58,11 +53,11 @@ class MapboxBulkGeocoder(MapboxGeocoder, StreetPointBulkGeocoder):
frees.append(free) frees.append(free)
self._logger.debug('--> sending free search: {}'.format(frees)) self._logger.debug('--> sending free search: {}'.format(frees))
xy_results = self.geocode_free_text(frees) full_results = self.geocode_free_text_meta(frees)
results = [] results = []
self._logger.debug('--> searches: {}; xy: {}'.format(searches, xy_results)) self._logger.debug('--> searches: {}; xy: {}'.format(searches, full_results))
for s, r in zip(searches, xy_results): for s, r in zip(searches, full_results):
results.append((s[0], r, [])) results.append((s[0], r[0], r[1]))
self._logger.debug('--> results: {}'.format(results)) self._logger.debug('--> results: {}'.format(results))
return results return results

View File

@ -22,6 +22,8 @@ ENTRY_COORDINATES = 'coordinates'
ENTRY_TYPE = 'type' ENTRY_TYPE = 'type'
TYPE_POINT = 'Point' TYPE_POINT = 'Point'
EMPTY_RESPONSE = [[], {}]
class MapboxGeocoder(Traceable): class MapboxGeocoder(Traceable):
''' '''
@ -49,12 +51,16 @@ class MapboxGeocoder(Traceable):
for a_json_response in json_response: for a_json_response in json_response:
if a_json_response[ENTRY_FEATURES]: if a_json_response[ENTRY_FEATURES]:
feature = a_json_response[ENTRY_FEATURES][0] feature = a_json_response[ENTRY_FEATURES][0]
result.append(self._extract_lng_lat_from_feature(feature)) result.append([
self._extract_lng_lat_from_feature(feature),
self._extract_metadata_from_result(feature)
]
)
else: else:
result.append([]) result.append(EMPTY_RESPONSE)
return result return result
else: else:
return [] return EMPTY_RESPONSE
def _extract_lng_lat_from_feature(self, feature): def _extract_lng_lat_from_feature(self, feature):
geometry = feature[ENTRY_GEOMETRY] geometry = feature[ENTRY_GEOMETRY]
@ -67,6 +73,14 @@ class MapboxGeocoder(Traceable):
latitude = location[1] latitude = location[1]
return [longitude, latitude] return [longitude, latitude]
def _extract_metadata_from_result(self, result):
return {
'relevance': self._normalize_relevance(float(result['relevance']))
}
def _normalize_relevance(self, relevance):
return 1 if relevance == 0.99 else relevance
def _validate_input(self, searchtext, city=None, state_province=None, def _validate_input(self, searchtext, city=None, state_province=None,
country=None): country=None):
if searchtext and searchtext.strip(): if searchtext and searchtext.strip():
@ -88,8 +102,13 @@ class MapboxGeocoder(Traceable):
:param country: Country ISO 3166 code :param country: Country ISO 3166 code
:return: [x, y] on success, [] on error :return: [x, y] on success, [] on error
""" """
return self.geocode_meta(searchtext, city, state_province, country)[0]
@qps_retry(qps=10)
def geocode_meta(self, searchtext, city=None, state_province=None,
country=None):
if not self._validate_input(searchtext, city, state_province, country): if not self._validate_input(searchtext, city, state_province, country):
return [] return EMPTY_RESPONSE
address = [] address = []
if searchtext and searchtext.strip(): if searchtext and searchtext.strip():
@ -99,32 +118,31 @@ class MapboxGeocoder(Traceable):
if state_province: if state_province:
address.append(normalize(state_province)) address.append(normalize(state_province))
country = [country] if country else None
free_search = ', '.join(address) free_search = ', '.join(address)
return self.geocode_free_text([free_search], country)[0] return self.geocode_free_text_meta([free_search], country)[0]
@qps_retry(qps=10) @qps_retry(qps=10)
def geocode_free_text(self, free_searches, country=None): def geocode_free_text_meta(self, free_searches, country=None):
""" """
:param free_searches: Free text searches :param free_searches: Free text searches
:param country: Country ISO 3166 code :param country: Country ISO 3166 code
:return: list of [x, y] on success, [] on error :return: list of [x, y] on success, [] on error
""" """
country = [country] if country else None
try: try:
free_search = ';'.join([self._escape(fs) for fs in free_searches]) free_search = ';'.join([self._escape(fs) for fs in free_searches])
self._logger.debug('--> free search: {}'.format(free_search)) self._logger.debug('--> free search: {}, country: {}'.format(free_search, country))
response = self._geocoder.forward(address=free_search.decode('utf-8'), response = self._geocoder.forward(address=free_search.decode('utf-8'),
country=country, country=country)
limit=1)
if response.status_code == requests.codes.ok: if response.status_code == requests.codes.ok:
return self._parse_geocoder_response(response.text) return self._parse_geocoder_response(response.text)
elif response.status_code == requests.codes.bad_request: elif response.status_code == requests.codes.bad_request:
return [] return EMPTY_RESPONSE
elif response.status_code == requests.codes.unprocessable_entity: elif response.status_code == requests.codes.unprocessable_entity:
return [] return EMPTY_RESPONSE
else: else:
raise ServiceException(response.status_code, response) raise ServiceException(response.status_code, response)
except requests.Timeout as te: except requests.Timeout as te:
@ -138,7 +156,7 @@ class MapboxGeocoder(Traceable):
# Don't raise the exception to continue with the geocoding job # Don't raise the exception to continue with the geocoding job
self._logger.error('Error connecting to Mapbox geocoding server', self._logger.error('Error connecting to Mapbox geocoding server',
exception=ce) exception=ce)
return [] return EMPTY_RESPONSE
def _escape(self, free_search): def _escape(self, free_search):
# Semicolon is used to separate batch geocoding; there's no documented # Semicolon is used to separate batch geocoding; there's no documented

View File

@ -22,7 +22,7 @@ class TestStreetFunctionsSetUp(TestCase):
'Madrid': [-3.7037902, 40.4167754], 'Madrid': [-3.7037902, 40.4167754],
'Logroño, Spain': [-2.4449852, 42.4627195], 'Logroño, Spain': [-2.4449852, 42.4627195],
'Logroño, Argentina': [-61.6961807, -29.5031057], 'Logroño, Argentina': [-61.6961807, -29.5031057],
'Plaza España 1, Barcelona': [2.1482563, 41.375485] 'Plaza España, Barcelona': [2.1482563, 41.375485]
} }
HERE_POINTS = { HERE_POINTS = {
@ -37,7 +37,7 @@ class TestStreetFunctionsSetUp(TestCase):
'Madrid': [-3.70578, 40.42028], 'Madrid': [-3.70578, 40.42028],
'Logroño, Spain': [-2.45194, 42.46592], 'Logroño, Spain': [-2.45194, 42.46592],
'Logroño, Argentina': [-61.69604, -29.50425], 'Logroño, Argentina': [-61.69604, -29.50425],
'Plaza España 1, Barcelona': [2.1735699, 41.3823] # TODO: not ideal 'Plaza España, Barcelona': [2.1735699, 41.3823] # TODO: not ideal
} }
TOMTOM_POINTS = HERE_POINTS.copy() TOMTOM_POINTS = HERE_POINTS.copy()
@ -48,7 +48,7 @@ class TestStreetFunctionsSetUp(TestCase):
'Valladolid, Spain': [-4.72838, 41.6542], 'Valladolid, Spain': [-4.72838, 41.6542],
'Madrid': [-3.70035, 40.42028], 'Madrid': [-3.70035, 40.42028],
'Logroño, Spain': [-2.44998, 42.46592], 'Logroño, Spain': [-2.44998, 42.46592],
'Plaza España 1, Barcelona': [2.07479, 41.36818] # TODO: not ideal 'Plaza España, Barcelona': [2.07479, 41.36818] # TODO: not ideal
}) })
MAPBOX_POINTS = GOOGLE_POINTS.copy() MAPBOX_POINTS = GOOGLE_POINTS.copy()
@ -59,7 +59,7 @@ class TestStreetFunctionsSetUp(TestCase):
'Valladolid, Spain': [-4.72856, 41.652251], 'Valladolid, Spain': [-4.72856, 41.652251],
'1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch '1902 amphitheatre parkway': [-118.03, 34.06], # TODO: huge mismatch
'Madrid': [-3.69194, 40.4167754], 'Madrid': [-3.69194, 40.4167754],
'Plaza España 1, Barcelona': [2.245969, 41.452483] # TODO: not ideal 'Plaza España, Barcelona': [2.245969, 41.452483] # TODO: not ideal
}) })
FIXTURE_POINTS = { FIXTURE_POINTS = {
@ -69,6 +69,21 @@ class TestStreetFunctionsSetUp(TestCase):
'mapbox': MAPBOX_POINTS 'mapbox': MAPBOX_POINTS
} }
HERE_RELEVANCES = {
'Plaza España, Barcelona': 1
}
MAPBOX_RELEVANCES = {
'Plaza España, Barcelona': 0.75
}
RELEVANCES = {
'here': HERE_RELEVANCES,
'tomtom': HERE_RELEVANCES,
'mapbox': MAPBOX_RELEVANCES,
'google': HERE_RELEVANCES
}
def setUp(self): def setUp(self):
self.env_variables = IntegrationTestHelper.get_environment_variables() self.env_variables = IntegrationTestHelper.get_environment_variables()
self.sql_api_url = "{0}://{1}.{2}/api/v1/sql".format( self.sql_api_url = "{0}://{1}.{2}/api/v1/sql".format(
@ -86,6 +101,8 @@ class TestStreetFunctionsSetUp(TestCase):
provider = response['rows'][0]['provider'] provider = response['rows'][0]['provider']
self.fixture_points = self.FIXTURE_POINTS[provider] self.fixture_points = self.FIXTURE_POINTS[provider]
self.relevances = self.RELEVANCES[provider]
def _run_authenticated(self, query): def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query, authenticated_query = "{}&api_key={}".format(query,
@ -118,12 +135,12 @@ class TestStreetFunctions(TestStreetFunctionsSetUp):
def test_component_aggregation(self): def test_component_aggregation(self):
query = "select st_x(the_geom), st_y(the_geom) from (" \ query = "select st_x(the_geom), st_y(the_geom) from (" \
"select cdb_dataservices_client.cdb_geocode_street_point( " \ "select cdb_dataservices_client.cdb_geocode_street_point( " \
"'Plaza España 1', 'Barcelona', null, 'Spain') as the_geom) _x" "'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
response = self._run_authenticated(query) response = self._run_authenticated(query)
row = response['rows'][0] row = response['rows'][0]
x_y = [row['st_x'], row['st_y']] x_y = [row['st_x'], row['st_y']]
# Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762] # Wrong coordinates (Plaza España, Madrid): [-3.7138975, 40.4256762]
assert_close_enough(x_y, self.fixture_points['Plaza España 1, Barcelona']) assert_close_enough(x_y, self.fixture_points['Plaza España, Barcelona'])
class TestBulkStreetFunctions(TestStreetFunctionsSetUp): class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
@ -294,18 +311,18 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
x_y_by_cartodb_id = self._x_y_by_cartodb_id(response) x_y_by_cartodb_id = self._x_y_by_cartodb_id(response)
assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2]) assert_equal(x_y_by_cartodb_id[1], x_y_by_cartodb_id[2])
# "'Plaza España 1', 'Barcelona', null, 'Spain') as the_geom) _x" # "'Plaza España', 'Barcelona', null, 'Spain') as the_geom) _x"
def test_component_aggregation(self): def test_component_aggregation(self):
query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \ query = "select cartodb_id, st_x(the_geom), st_y(the_geom) " \
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \ "FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \ "'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \ "''Barcelona'' as city, " \
"''Plaza España 1'' as street' " \ "''Plaza España'' as street' " \
", 'street', 'city', NULL, 'country')" ", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query) response = self._run_authenticated(query)
assert_close_enough(self._x_y_by_cartodb_id(response)[1], assert_close_enough(self._x_y_by_cartodb_id(response)[1],
self.fixture_points['Plaza España 1, Barcelona']) self.fixture_points['Plaza España, Barcelona'])
def _test_known_table(self): def _test_known_table(self):
subquery = 'select * from known_table where cartodb_id < 1100' subquery = 'select * from known_table where cartodb_id < 1100'
@ -325,11 +342,12 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \ "FROM cdb_dataservices_client.cdb_bulk_geocode_street_point(" \
"'select 1 as cartodb_id, ''Spain'' as country, " \ "'select 1 as cartodb_id, ''Spain'' as country, " \
"''Barcelona'' as city, " \ "''Barcelona'' as city, " \
"''Plaza España 1'' as street' " \ "''Plaza España'' as street' " \
", 'street', 'city', NULL, 'country')" ", 'street', 'city', NULL, 'country')"
response = self._run_authenticated(query) response = self._run_authenticated(query)
assert_true(isclose(response['rows'][0]['metadata']['relevance'], 1)) assert_true(isclose(response['rows'][0]['metadata']['relevance'],
self.relevances['Plaza España, Barcelona']))
def _run_authenticated(self, query): def _run_authenticated(self, query):
authenticated_query = "{}&api_key={}".format(query, authenticated_query = "{}&api_key={}".format(query,