Relevance metadata for Google

This commit is contained in:
Juan Ignacio Sánchez Lara 2018-07-11 11:43:54 +02:00
parent 0b2ee85c11
commit d46d51c3bb
3 changed files with 56 additions and 28 deletions

View File

@ -6,8 +6,7 @@ from cartodb_services.google import GoogleMapsGeocoder
def async_geocoder(geocoder, address, components):
results = geocoder.geocode(address=address, components=components)
return results if results else []
return geocoder.geocode(address=address, components=components)
class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
@ -26,12 +25,9 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
results = []
for search in searches:
(cartodb_id, street, city, state, country) = search
address = compose_address(street, city, state, country)
components = self._build_optional_parameters(city, state, country)
result = self.geocoder.geocode(address=address, components=components)
lng_lat = self._extract_lng_lat_from_result(result[0]) if result else []
self._logger.debug('--> lng_lat: {}'.format(lng_lat))
results.append((cartodb_id, lng_lat, []))
lng_lat, metadata = self.geocode_meta(street, city, state, country)
self._logger.debug('--> lng_lat: {}. metadata: {}'.format(lng_lat, metadata))
results.append((cartodb_id, lng_lat, metadata))
return results
def _batch_geocode(self, searches):
@ -39,16 +35,13 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
pool = Pool(processes=self.PARALLEL_PROCESSES)
for search in searches:
(cartodb_id, street, city, state, country) = search
components = self._build_optional_parameters(city, state, country)
# Geocoding works better if components are also inside the address
self._logger.debug('async geocoding --> {}'.format(search))
address = compose_address(street, city, state, country)
if address:
self._logger.debug('async geocoding --> {} {}'.format(address.encode('utf-8'), components))
components = self._build_optional_parameters(city, state, country)
result = pool.apply_async(async_geocoder,
(self.geocoder, address, components))
else:
result = []
bulk_results[cartodb_id] = result
bulk_results[cartodb_id] = result
pool.close()
pool.join()
@ -56,13 +49,12 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
results = []
for cartodb_id, bulk_result in bulk_results.items():
try:
result = bulk_result.get()
lng_lat, metadata = self._process_results(bulk_result.get())
except Exception as e:
self._logger.error('Error at Google async_geocoder', e)
result = []
lng_lat, metadata = [[], {}]
lng_lat = self._extract_lng_lat_from_result(result[0]) if result else []
results.append((cartodb_id, lng_lat, []))
results.append((cartodb_id, lng_lat, metadata))
return results
except KeyError as e:
self._logger.error('KeyError error', exception=e)
@ -70,4 +62,3 @@ class GoogleMapsBulkGeocoder(GoogleMapsGeocoder, StreetPointBulkGeocoder):
except Exception as e:
self._logger.error('General error', exception=e)
raise e

View File

@ -8,6 +8,15 @@ from cartodb_services.geocoder import compose_address
from cartodb_services.google.exceptions import InvalidGoogleCredentials
from client_factory import GoogleMapsClientFactory
EMPTY_RESPONSE = [[], {}]
PARTIAL_FACTOR = 0.8
RELEVANCE_BY_LOCATION_TYPE = {
'ROOFTOP': 1,
'GEOMETRIC_CENTER': 0.9,
'RANGE_INTERPOLATED': 0.8,
'APPROXIMATE': 0.7
}
class GoogleMapsGeocoder():
@ -19,26 +28,49 @@ class GoogleMapsGeocoder():
self.geocoder = GoogleMapsClientFactory.get(self.client_id, self.client_secret, self.channel)
self._logger = logger
def geocode(self, searchtext, city=None, state=None,
country=None):
def geocode(self, searchtext, city=None, state=None, country=None):
return self.geocode_meta(searchtext, city, state, country)[0]
def geocode_meta(self, searchtext, city=None, state=None, country=None):
try:
address = compose_address(searchtext, city, state, country)
opt_params = self._build_optional_parameters(city, state, country)
results = self.geocoder.geocode(address=address,
components=opt_params)
if results:
return self._extract_lng_lat_from_result(results[0])
else:
return []
except KeyError:
return self._process_results(results)
except KeyError as e:
self._logger.error('params: {}, {}, {}, {}'.format(
searchtext.encode('utf-8'), city.encode('utf-8'),
state.encode('utf-8'), country.encode('utf-8')
), e)
raise MalformedResult()
def _process_results(self, results):
if results:
self._logger.debug('--> results: {}'.format(results[0]))
return [
self._extract_lng_lat_from_result(results[0]),
self._extract_metadata_from_result(results[0])
]
else:
return EMPTY_RESPONSE
def _extract_lng_lat_from_result(self, result):
location = result['geometry']['location']
longitude = location['lng']
latitude = location['lat']
return [longitude, latitude]
def _extract_metadata_from_result(self, result):
location_type = result['geometry']['location_type']
base_relevance = RELEVANCE_BY_LOCATION_TYPE[location_type]
partial_match = result.get('partial_match', False)
partial_factor = PARTIAL_FACTOR if partial_match else 1
return {
'relevance': base_relevance * partial_factor
}
def _build_optional_parameters(self, city=None, state=None,
country=None):
optional_params = {}

View File

@ -83,8 +83,13 @@ class TestStreetFunctionsSetUp(TestCase):
'Plaza España, Barcelona': 0.85
})
GOOGLE_RELEVANCES = HERE_RELEVANCES.copy()
GOOGLE_RELEVANCES.update({
'Plaza España, Barcelona': 0.9
})
RELEVANCES = {
'google': HERE_RELEVANCES,
'google': GOOGLE_RELEVANCES,
'here': HERE_RELEVANCES,
'tomtom': TOMTOM_RELEVANCES,
'mapbox': MAPBOX_RELEVANCES
@ -331,7 +336,7 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
self.fixture_points['Plaza España, Barcelona'])
def _test_known_table(self):
subquery = 'select * from known_table where cartodb_id < 1100'
subquery = 'select * from unknown_table where cartodb_id < 1100'
subquery_count = 'select count(1) from ({}) _x'.format(subquery)
count = self._run_authenticated(subquery_count)['rows'][0]['count']