TomTom bulk geocoding by bypassing to serial
This commit is contained in:
parent
5be43e15c0
commit
31afc82b56
@ -83,3 +83,6 @@ deploy: release_remove_parallel_deploy
|
||||
$(INSTALL_DATA) old_versions/*.sql *.sql '$(DESTDIR)$(datadir)/extension/'
|
||||
|
||||
install: deploy
|
||||
|
||||
reinstall: install
|
||||
psql -U postgres -d dataservices_db -c "drop extension if exists cdb_dataservices_server; create extension cdb_dataservices_server;"
|
||||
|
@ -2367,12 +2367,15 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
|
||||
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
|
||||
if user_geocoder_config.google_geocoder:
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._cdb_bulk_google_geocode_street_point($1, $2, $3); ", ["text", "text", "jsonb"])
|
||||
provider_function = "_cdb_bulk_google_geocode_street_point";
|
||||
elif user_geocoder_config.heremaps_geocoder:
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point($1, $2, $3); ", ["text", "text", "jsonb"])
|
||||
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
|
||||
elif user_geocoder_config.tomtom_geocoder:
|
||||
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
|
||||
else:
|
||||
raise Exception('Requested geocoder is not available')
|
||||
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
|
||||
result = plpy.execute(plan, [username, orgname, searches])
|
||||
return result
|
||||
|
||||
@ -2400,6 +2403,23 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
|
||||
from cartodb_services.tomtom import TomTomBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_geocode_admin0_polygon(username text, orgname text, country_name text)
|
||||
RETURNS Geometry AS $$
|
||||
from cartodb_services.metrics import QuotaService
|
||||
|
@ -24,12 +24,15 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
|
||||
with metrics('cdb_bulk_geocode_street_point', user_geocoder_config, logger, params):
|
||||
if user_geocoder_config.google_geocoder:
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._cdb_bulk_google_geocode_street_point($1, $2, $3); ", ["text", "text", "jsonb"])
|
||||
provider_function = "_cdb_bulk_google_geocode_street_point";
|
||||
elif user_geocoder_config.heremaps_geocoder:
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._cdb_bulk_heremaps_geocode_street_point($1, $2, $3); ", ["text", "text", "jsonb"])
|
||||
provider_function = "_cdb_bulk_heremaps_geocode_street_point";
|
||||
elif user_geocoder_config.tomtom_geocoder:
|
||||
provider_function = "_cdb_bulk_tomtom_geocode_street_point";
|
||||
else:
|
||||
raise Exception('Requested geocoder is not available')
|
||||
|
||||
plan = plpy.prepare("SELECT * FROM cdb_dataservices_server.{}($1, $2, $3); ".format(provider_function), ["text", "text", "jsonb"])
|
||||
result = plpy.execute(plan, [username, orgname, searches])
|
||||
return result
|
||||
|
||||
@ -57,3 +60,20 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_tomtom_geocode_street_point(username TEXT, orgname TEXT, searches jsonb)
|
||||
RETURNS SETOF cdb_dataservices_server.geocoding AS $$
|
||||
from cartodb_services import run_street_point_geocoder
|
||||
from cartodb_services.tools import ServiceManager
|
||||
from cartodb_services.refactor.service.tomtom_geocoder_config import TomTomGeocoderConfigBuilder
|
||||
from cartodb_services.tomtom import TomTomBulkGeocoder
|
||||
from cartodb_services.tools import Logger
|
||||
import cartodb_services
|
||||
cartodb_services.init(plpy, GD)
|
||||
|
||||
logger_config = GD["logger_config"]
|
||||
logger = Logger(logger_config)
|
||||
service_manager = ServiceManager('geocoder', TomTomGeocoderConfigBuilder, username, orgname, GD)
|
||||
geocoder = TomTomBulkGeocoder(service_manager.config.tomtom_api_key, service_manager.logger, service_manager.config.service_params)
|
||||
return run_street_point_geocoder(plpy, GD, geocoder, service_manager, username, orgname, searches)
|
||||
$$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED;
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
from geocoder import TomTomGeocoder
|
||||
from bulk_geocoder import TomTomBulkGeocoder
|
||||
from routing import TomTomRouting, TomTomRoutingResponse
|
||||
from isolines import TomTomIsolines, TomTomIsochronesResponse
|
||||
|
@ -0,0 +1,36 @@
|
||||
from cartodb_services import StreetPointBulkGeocoder
|
||||
from cartodb_services.tomtom import TomTomGeocoder
|
||||
|
||||
|
||||
class TomTomBulkGeocoder(TomTomGeocoder, StreetPointBulkGeocoder):
|
||||
# TODO: ?
|
||||
MAX_BATCH_SIZE = 1000000 # From the docs
|
||||
# TODO: ?
|
||||
MIN_BATCHED_SEARCH = 100 # Under this, serial will be used
|
||||
|
||||
def _bulk_geocode(self, searches):
|
||||
if len(searches) > self.MAX_BATCH_SIZE:
|
||||
raise Exception("Batch size can't be larger than {}".format(self.MAX_BATCH_SIZE))
|
||||
if self._should_use_batch(searches):
|
||||
self._logger.debug('--> Batch geocode')
|
||||
return self._batch_geocode(searches)
|
||||
else:
|
||||
self._logger.debug('--> Serial geocode')
|
||||
return self._serial_geocode(searches)
|
||||
|
||||
def _should_use_batch(self, searches):
|
||||
return len(searches) >= self.MIN_BATCHED_SEARCH
|
||||
|
||||
def _serial_geocode(self, searches):
|
||||
results = []
|
||||
for search in searches:
|
||||
(search_id, address, city, state, country) = search
|
||||
self._logger.debug('--> Sending serial search: {}'.format(search))
|
||||
coordinates = self.geocode(searchtext=address.encode('utf-8'),
|
||||
city=city.encode('utf-8'),
|
||||
state_province=state.encode('utf-8'),
|
||||
country=country.encode('utf-8'))
|
||||
self._logger.debug('--> result sent')
|
||||
results.append((search_id, coordinates, []))
|
||||
return results
|
||||
|
@ -71,9 +71,20 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
|
||||
'Logroño, Argentina': [-61.69604, -29.50425]
|
||||
}
|
||||
|
||||
TOMTOM_POINTS = HERE_POINTS.copy()
|
||||
TOMTOM_POINTS.update({
|
||||
'Plaza Mayor, Valladolid': [-4.72183, 41.5826],
|
||||
'Paseo Zorrilla, Valladolid': [-4.74031, 41.63181],
|
||||
'Valladolid': [-4.72838, 41.6542],
|
||||
'Valladolid, Spain': [-4.72838, 41.6542],
|
||||
'Madrid': [-3.70035, 40.42028],
|
||||
'Logroño, Spain': [-2.44998, 42.46592],
|
||||
})
|
||||
|
||||
FIXTURE_POINTS = {
|
||||
'google': GOOGLE_POINTS,
|
||||
'heremaps': HERE_POINTS
|
||||
'heremaps': HERE_POINTS,
|
||||
'tomtom': TOMTOM_POINTS
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
|
Loading…
Reference in New Issue
Block a user