From bbbf70f3ace5a121a7e8669e5f716c8d0014da1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Ignacio=20S=C3=A1nchez=20Lara?= Date: Fri, 22 Jun 2018 12:19:51 +0200 Subject: [PATCH] Street level columns for country, city and state --- client/cdb_dataservices_client--0.25.0.sql | 13 ++++++++----- client/sql/21_bulk_geocoding_functions.sql | 13 ++++++++----- .../cdb_dataservices_server--0.32.0.sql | 8 ++++---- .../extension/sql/21_bulk_geocode_street.sql | 8 ++++---- .../cartodb_services/google/geocoder.py | 19 ++++++++++++------- 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/client/cdb_dataservices_client--0.25.0.sql b/client/cdb_dataservices_client--0.25.0.sql index 6d87658..56f8826 100644 --- a/client/cdb_dataservices_client--0.25.0.sql +++ b/client/cdb_dataservices_client--0.25.0.sql @@ -1986,7 +1986,8 @@ CREATE OR REPLACE FUNCTION cdb_dataservices_client._DST_DisconnectUserTable( CONNECT cdb_dataservices_client._server_conn_str(); TARGET cdb_dataservices_server._DST_DisconnectUserTable; $$ LANGUAGE plproxy VOLATILE PARALLEL UNSAFE; -CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text, searchtext text) +CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text, + country_column text, state_column text, city_column text, street_column text) RETURNS SETOF cdb_dataservices_client.geocoding AS $$ DECLARE query_row_count integer; @@ -1999,8 +2000,8 @@ DECLARE BEGIN EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count; - RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; searchtext: %', - query_row_count, query, searchtext; + RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %', + query_row_count, query, country_column, state_column, city_column, street_column; SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota; IF enough_quota IS NOT NULL AND enough_quota THEN RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count; @@ -2018,12 +2019,14 @@ BEGIN EXECUTE format( 'WITH geocoding_data as (' || - ' SELECT json_build_object(''id'', cartodb_id, ''address'', %s) as data , floor((cartodb_id-1)::float/$1) as batch' || + ' SELECT ' || + ' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' || + ' floor((cartodb_id-1)::float/$1) as batch' || ' FROM (%s) _x' || ')' || 'INSERT INTO bulk_geocode_street_point SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' || 'FROM geocoding_data ' || - 'WHERE batch = $2', searchtext, query) + 'WHERE batch = $2', street_column, city_column, state_column, country_column, query) USING BATCHES_SIZE, cartodb_id_batch; GET DIAGNOSTICS current_row_count = ROW_COUNT; diff --git a/client/sql/21_bulk_geocoding_functions.sql b/client/sql/21_bulk_geocoding_functions.sql index 093cb60..c103100 100644 --- a/client/sql/21_bulk_geocoding_functions.sql +++ b/client/sql/21_bulk_geocoding_functions.sql @@ -1,4 +1,5 @@ -CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text, searchtext text) +CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text, + country_column text, state_column text, city_column text, street_column text) RETURNS SETOF cdb_dataservices_client.geocoding AS $$ DECLARE query_row_count integer; @@ -11,8 +12,8 @@ DECLARE BEGIN EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count; - RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; searchtext: %', - query_row_count, query, searchtext; + RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %', + query_row_count, query, country_column, state_column, city_column, street_column; SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota; IF enough_quota IS NOT NULL AND enough_quota THEN RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count; @@ -30,12 +31,14 @@ BEGIN EXECUTE format( 'WITH geocoding_data as (' || - ' SELECT json_build_object(''id'', cartodb_id, ''address'', %s) as data , floor((cartodb_id-1)::float/$1) as batch' || + ' SELECT ' || + ' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' || + ' floor((cartodb_id-1)::float/$1) as batch' || ' FROM (%s) _x' || ')' || 'INSERT INTO bulk_geocode_street_point SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' || 'FROM geocoding_data ' || - 'WHERE batch = $2', searchtext, query) + 'WHERE batch = $2', street_column, city_column, state_column, country_column, query) USING BATCHES_SIZE, cartodb_id_batch; GET DIAGNOSTICS current_row_count = ROW_COUNT; diff --git a/server/extension/cdb_dataservices_server--0.32.0.sql b/server/extension/cdb_dataservices_server--0.32.0.sql index 59239e9..775c904 100644 --- a/server/extension/cdb_dataservices_server--0.32.0.sql +++ b/server/extension/cdb_dataservices_server--0.32.0.sql @@ -2375,7 +2375,7 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ $$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED; -CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searchtext jsonb) +CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb) RETURNS SETOF cdb_dataservices_server.geocoding AS $$ from cartodb_services.tools import LegacyServiceManager,QuotaExceededException,Logger from cartodb_services.google import GoogleMapsGeocoder @@ -2389,7 +2389,7 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ try: service_manager.assert_within_limits(quota=False) geocoder = GoogleMapsGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger) - geocode_results = geocoder.bulk_geocode(searchtext=searchtext) + geocode_results = geocoder.bulk_geocode(searches=searches) if geocode_results: results = [] for result in geocode_results: @@ -2402,10 +2402,10 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ service_manager.quota_service.increment_success_service_use(len(results)) return results else: - service_manager.quota_service.increment_empty_service_use(len(searchtext)) + service_manager.quota_service.increment_empty_service_use(len(searches)) return [] except QuotaExceededException as qe: - service_manager.quota_service.increment_failed_service_use(len(searchtext)) + service_manager.quota_service.increment_failed_service_use(len(searches)) return [] except BaseException as e: import sys diff --git a/server/extension/sql/21_bulk_geocode_street.sql b/server/extension/sql/21_bulk_geocode_street.sql index 377b825..aebb6b8 100644 --- a/server/extension/sql/21_bulk_geocode_street.sql +++ b/server/extension/sql/21_bulk_geocode_street.sql @@ -32,7 +32,7 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ $$ LANGUAGE plpythonu STABLE PARALLEL RESTRICTED; -CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searchtext jsonb) +CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_bulk_google_geocode_street_point(username TEXT, orgname TEXT, searches jsonb) RETURNS SETOF cdb_dataservices_server.geocoding AS $$ from cartodb_services.tools import LegacyServiceManager,QuotaExceededException,Logger from cartodb_services.google import GoogleMapsGeocoder @@ -46,7 +46,7 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ try: service_manager.assert_within_limits(quota=False) geocoder = GoogleMapsGeocoder(service_manager.config.google_client_id, service_manager.config.google_api_key, service_manager.logger) - geocode_results = geocoder.bulk_geocode(searchtext=searchtext) + geocode_results = geocoder.bulk_geocode(searches=searches) if geocode_results: results = [] for result in geocode_results: @@ -59,10 +59,10 @@ RETURNS SETOF cdb_dataservices_server.geocoding AS $$ service_manager.quota_service.increment_success_service_use(len(results)) return results else: - service_manager.quota_service.increment_empty_service_use(len(searchtext)) + service_manager.quota_service.increment_empty_service_use(len(searches)) return [] except QuotaExceededException as qe: - service_manager.quota_service.increment_failed_service_use(len(searchtext)) + service_manager.quota_service.increment_failed_service_use(len(searches)) return [] except BaseException as e: import sys diff --git a/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py b/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py index 3dff728..ba0a663 100644 --- a/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/google/geocoder.py @@ -14,16 +14,18 @@ import json import time, random -def async_geocoder(geocoder, address): +def async_geocoder(geocoder, address, components): + # TODO: clean this and previous import # time.sleep(.3 + random.random()) # return [{ 'geometry': { 'location': { 'lng': 1, 'lat': 2 } } }] - results = geocoder.geocode(address=address) + results = geocoder.geocode(address=address, components=components) return results if results else [] class GoogleMapsGeocoder: """A Google Maps Geocoder wrapper for python""" PARALLEL_PROCESSES = 13 + SEARCH_KEYS = ['id', 'address', 'city', 'state', 'country'] def __init__(self, client_id, client_secret, logger): if client_id is None: @@ -46,20 +48,23 @@ class GoogleMapsGeocoder: except KeyError: raise MalformedResult() - def bulk_geocode(self, searchtext): + def bulk_geocode(self, searches): try: - decoded_searchtext = json.loads(searchtext) + decoded_searches = json.loads(searches) except Exception as e: self._logger.error('General error', exception=e) raise e bulk_results = {} pool = Pool(processes=self.PARALLEL_PROCESSES) - for search in decoded_searchtext: - search_id, address = [search[k] for k in ['id', 'address']] + for search in decoded_searches: + search_id, address, city, state, country = \ + [search.get(k, None) for k in self.SEARCH_KEYS] + opt_params = self._build_optional_parameters(city, state, country) if address: + self._logger.debug('async geocoding --> {} {}'.format(address.encode('utf-8'), opt_params)) result = pool.apply_async(async_geocoder, - (self.geocoder, address)) + (self.geocoder, address, opt_params)) else: result = [] bulk_results[search_id] = result