From 71d5ce951a9e145967e9e6571f4444956af02ec0 Mon Sep 17 00:00:00 2001 From: Mario de Frutos Date: Thu, 25 Aug 2016 16:03:04 +0200 Subject: [PATCH] Use mapzen as first option for the namedplace geocoding --- client/renderer/interface.yaml | 1 - ...db_dataservices_server--0.14.1--0.14.2.sql | 0 ...db_dataservices_server--0.14.2--0.14.1.sql | 0 .../cdb_dataservices_server--0.14.2.sql | 0 server/extension/sql/15_config_helper.sql | 4 +- server/extension/sql/50_namedplaces.sql | 98 +++++++++++-------- .../cartodb_services/mapzen/geocoder.py | 9 +- .../cartodb_services/metrics/config.py | 13 ++- server/lib/python/cartodb_services/setup.py | 2 +- 9 files changed, 71 insertions(+), 56 deletions(-) rename server/extension/{ => old_versions}/cdb_dataservices_server--0.14.1--0.14.2.sql (100%) rename server/extension/{ => old_versions}/cdb_dataservices_server--0.14.2--0.14.1.sql (100%) rename server/extension/{ => old_versions}/cdb_dataservices_server--0.14.2.sql (100%) diff --git a/client/renderer/interface.yaml b/client/renderer/interface.yaml index d9eb13a..7c744e3 100644 --- a/client/renderer/interface.yaml +++ b/client/renderer/interface.yaml @@ -33,7 +33,6 @@ - { name: admin1_name, type: text} - { name: country_name, type: text} - - name: cdb_geocode_postalcode_polygon return_type: Geometry params: diff --git a/server/extension/cdb_dataservices_server--0.14.1--0.14.2.sql b/server/extension/old_versions/cdb_dataservices_server--0.14.1--0.14.2.sql similarity index 100% rename from server/extension/cdb_dataservices_server--0.14.1--0.14.2.sql rename to server/extension/old_versions/cdb_dataservices_server--0.14.1--0.14.2.sql diff --git a/server/extension/cdb_dataservices_server--0.14.2--0.14.1.sql b/server/extension/old_versions/cdb_dataservices_server--0.14.2--0.14.1.sql similarity index 100% rename from server/extension/cdb_dataservices_server--0.14.2--0.14.1.sql rename to server/extension/old_versions/cdb_dataservices_server--0.14.2--0.14.1.sql diff --git a/server/extension/cdb_dataservices_server--0.14.2.sql b/server/extension/old_versions/cdb_dataservices_server--0.14.2.sql similarity index 100% rename from server/extension/cdb_dataservices_server--0.14.2.sql rename to server/extension/old_versions/cdb_dataservices_server--0.14.2.sql diff --git a/server/extension/sql/15_config_helper.sql b/server/extension/sql/15_config_helper.sql index 79ca455..5f6fc77 100644 --- a/server/extension/sql/15_config_helper.sql +++ b/server/extension/sql/15_config_helper.sql @@ -10,7 +10,7 @@ RETURNS boolean AS $$ return True $$ LANGUAGE plpythonu SECURITY DEFINER; -CREATE OR REPLACE FUNCTION cdb_dataservices_server._get_geocoder_config(username text, orgname text) +CREATE OR REPLACE FUNCTION cdb_dataservices_server._get_geocoder_config(username text, orgname text, provider text DEFAULT NULL) RETURNS boolean AS $$ cache_key = "user_geocoder_config_{0}".format(username) if cache_key in GD: @@ -19,7 +19,7 @@ RETURNS boolean AS $$ from cartodb_services.metrics import GeocoderConfig plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username)) redis_conn = GD["redis_connection_{0}".format(username)]['redis_metadata_connection'] - geocoder_config = GeocoderConfig(redis_conn, plpy, username, orgname) + geocoder_config = GeocoderConfig(redis_conn, plpy, username, orgname, provider) GD[cache_key] = geocoder_config return True $$ LANGUAGE plpythonu SECURITY DEFINER; diff --git a/server/extension/sql/50_namedplaces.sql b/server/extension/sql/50_namedplaces.sql index 43455db..bfea5a1 100644 --- a/server/extension/sql/50_namedplaces.sql +++ b/server/extension/sql/50_namedplaces.sql @@ -1,76 +1,81 @@ ---- cdb_geocode_namedplace_point(city_name text) CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_geocode_namedplace_point(username text, orgname text, city_name text) RETURNS Geometry AS $$ - from cartodb_services.metrics import QuotaService - from cartodb_services.metrics import InternalGeocoderConfig - from cartodb_services.tools import Logger,LoggerConfig - - - plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username)) - redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection'] - plpy.execute("SELECT cdb_dataservices_server._get_internal_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname))) - user_geocoder_config = GD["user_internal_geocoder_config_{0}".format(username)] - - plpy.execute("SELECT cdb_dataservices_server._get_logger_config()") - logger_config = GD["logger_config"] - logger = Logger(logger_config) - quota_service = QuotaService(user_geocoder_config, redis_conn) try: - plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1)) AS mypoint", ["text"]) - rv = plpy.execute(plan, [city_name], 1) - result = rv[0]["mypoint"] - if result: - quota_service.increment_success_service_use() - return result - else: - quota_service.increment_empty_service_use() - return None + mapzen_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_mapzen_geocode_namedplace($1, $2, $3) as point;", ["text", "text", "text"]) + return plpy.execute(mapzen_plan, [username, orgname, city_name])[0]['point'] except BaseException as e: - import sys - quota_service.increment_failed_service_use() - logger.error('Error trying to geocode namedplace point', sys.exc_info(), data={"username": username, "orgname": orgname}) - raise Exception('Error trying to geocode namedplace point') - finally: - quota_service.increment_total_service_use() + internal_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_internal_geocode_namedplace($1, $2, $3) as point;", ["text", "text", "text"]) + return plpy.execute(internal_plan, [username, orgname, city_name])[0]['point'] $$ LANGUAGE plpythonu; ---- cdb_geocode_namedplace_point(city_name text, country_name text) CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_geocode_namedplace_point(username text, orgname text, city_name text, country_name text) RETURNS Geometry AS $$ + try: + mapzen_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_mapzen_geocode_namedplace($1, $2, $3, NULL, $4) as point;", ["text", "text", "text", "text"]) + return plpy.execute(mapzen_plan, [username, orgname, city_name, country_name])[0]['point'] + except BaseException as e: + internal_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_internal_geocode_namedplace($1, $2, $3, NULL, $4) as point;", ["text", "text", "text", "text"]) + return plpy.execute(internal_plan, [username, orgname, city_name, country_name])[0]['point'] +$$ LANGUAGE plpythonu; + +---- cdb_geocode_namedplace_point(city_name text, admin1_name text, country_name text) +CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_geocode_namedplace_point(username text, orgname text, city_name text, admin1_name text, country_name text) +RETURNS Geometry AS $$ + try: + mapzen_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_mapzen_geocode_namedplace($1, $2, $3, $4, $5) as point;", ["text", "text", "text", "text", "text"]) + return plpy.execute(mapzen_plan, [username, orgname, city_name, admin1_name, country_name])[0]['point'] + except BaseException as e: + internal_plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_internal_geocode_namedplace($1, $2, $3, $4, $5) as point;", ["text", "text", "text", "text", "text"]) + return plpy.execute(internal_plan, [username, orgname, city_name, admin1_name, country_name])[0]['point'] +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_mapzen_geocode_namedplace(username text, orgname text, city_name text, admin1_name text DEFAULT NULL, country_name text DEFAULT NULL) +RETURNS Geometry AS $$ + from cartodb_services.mapzen import MapzenGeocoder + from cartodb_services.mapzen.types import country_to_iso3 from cartodb_services.metrics import QuotaService - from cartodb_services.metrics import InternalGeocoderConfig from cartodb_services.tools import Logger,LoggerConfig plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username)) redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection'] - plpy.execute("SELECT cdb_dataservices_server._get_internal_geocoder_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname))) - user_geocoder_config = GD["user_internal_geocoder_config_{0}".format(username)] + plpy.execute("SELECT cdb_dataservices_server._get_geocoder_config({0}, {1}, {2})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname), plpy.quote_nullable('mapzen'))) + user_geocoder_config = GD["user_geocoder_config_{0}".format(username)] plpy.execute("SELECT cdb_dataservices_server._get_logger_config()") logger_config = GD["logger_config"] logger = Logger(logger_config) quota_service = QuotaService(user_geocoder_config, redis_conn) + if not quota_service.check_user_quota(): + raise Exception('You have reached the limit of your quota') + try: - plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1), trim($2)) AS mypoint", ["text", "text"]) - rv = plpy.execute(plan, [city_name, country_name], 1) - result = rv[0]["mypoint"] - if result: + geocoder = MapzenGeocoder(user_geocoder_config.mapzen_api_key, logger) + country_iso3 = None + if country_name: + country_iso3 = country_to_iso3(country_name) + coordinates = geocoder.geocode(searchtext=city_name, city=None, + state_province=admin1_name, + country=country_iso3, search_type='locality') + if coordinates: quota_service.increment_success_service_use() - return result + plan = plpy.prepare("SELECT ST_SetSRID(ST_MakePoint($1, $2), 4326); ", ["double precision", "double precision"]) + point = plpy.execute(plan, [coordinates[0], coordinates[1]], 1)[0] + return point['st_setsrid'] else: quota_service.increment_empty_service_use() return None except BaseException as e: import sys quota_service.increment_failed_service_use() - logger.error('Error trying to geocode namedplace point', sys.exc_info(), data={"username": username, "orgname": orgname}) - raise Exception('Error trying to geocode namedplace point') + logger.error('Error trying to geocode city point using mapzen', sys.exc_info(), data={"username": username, "orgname": orgname}) + raise Exception('Error trying to geocode city point using mapzen') finally: quota_service.increment_total_service_use() $$ LANGUAGE plpythonu; ----- cdb_geocode_namedplace_point(city_name text, admin1_name text, country_name text) -CREATE OR REPLACE FUNCTION cdb_dataservices_server.cdb_geocode_namedplace_point(username text, orgname text, city_name text, admin1_name text, country_name text) +CREATE OR REPLACE FUNCTION cdb_dataservices_server._cdb_internal_geocode_namedplace(username text, orgname text, city_name text, admin1_name text DEFAULT NULL, country_name text DEFAULT NULL) RETURNS Geometry AS $$ from cartodb_services.metrics import QuotaService from cartodb_services.metrics import InternalGeocoderConfig @@ -86,8 +91,15 @@ RETURNS Geometry AS $$ logger = Logger(logger_config) quota_service = QuotaService(user_geocoder_config, redis_conn) try: - plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1), trim($2), trim($3)) AS mypoint", ["text", "text", "text"]) - rv = plpy.execute(plan, [city_name, admin1_name, country_name], 1) + if admin1_name and country_name: + plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1), trim($2), trim($3)) AS mypoint", ["text", "text", "text"]) + rv = plpy.execute(plan, [city_name, plpy.quote_nullable(admin1_name), plpy.quote_nullable(country_name)], 1) + elif country_name: + plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1), trim($2)) AS mypoint", ["text", "text"]) + rv = plpy.execute(plan, [city_name, plpy.quote_nullable(country_name)], 1) + else: + plan = plpy.prepare("SELECT cdb_dataservices_server._cdb_geocode_namedplace_point(trim($1)) AS mypoint", ["text"]) + rv = plpy.execute(plan, [city_name], 1) result = rv[0]["mypoint"] if result: quota_service.increment_success_service_use() diff --git a/server/lib/python/cartodb_services/cartodb_services/mapzen/geocoder.py b/server/lib/python/cartodb_services/cartodb_services/mapzen/geocoder.py index 235db94..84ae4bd 100644 --- a/server/lib/python/cartodb_services/cartodb_services/mapzen/geocoder.py +++ b/server/lib/python/cartodb_services/cartodb_services/mapzen/geocoder.py @@ -18,10 +18,10 @@ class MapzenGeocoder: self._logger = logger @qps_retry - def geocode(self, searchtext, city=None, state_province=None, country=None): + def geocode(self, searchtext, city=None, state_province=None, country=None, search_type=None): request_params = self._build_requests_parameters(searchtext, city, state_province, - country) + country, search_type) try: response = requests.get(self._url, params=request_params) if response.status_code == requests.codes.ok: @@ -47,13 +47,14 @@ class MapzenGeocoder: def _build_requests_parameters(self, searchtext, city=None, - state_province=None, country=None): + state_province=None, country=None, + search_type=None): request_params = {} search_string = self._build_search_text(searchtext.strip(), city, state_province) request_params['text'] = search_string - request_params['layers'] = 'address' + request_params['layers'] = search_type if search_type else 'address' request_params['api_key'] = self._app_key if country: request_params['boundary.country'] = country diff --git a/server/lib/python/cartodb_services/cartodb_services/metrics/config.py b/server/lib/python/cartodb_services/cartodb_services/metrics/config.py index 4d22332..d6e7271 100644 --- a/server/lib/python/cartodb_services/cartodb_services/metrics/config.py +++ b/server/lib/python/cartodb_services/cartodb_services/metrics/config.py @@ -286,11 +286,11 @@ class GeocoderConfig(ServiceConfig): PERIOD_END_DATE = 'period_end_date' DEFAULT_PROVIDER = 'mapzen' - def __init__(self, redis_connection, db_conn, username, orgname=None): + def __init__(self, redis_connection, db_conn, username, orgname=None, forced_provider=None): super(GeocoderConfig, self).__init__(redis_connection, db_conn, username, orgname) filtered_config = {key: self._redis_config[key] for key in self.GEOCODER_CONFIG_KEYS if key in self._redis_config.keys()} - self.__parse_config(filtered_config, self._db_config) + self.__parse_config(filtered_config, self._db_config, forced_provider) self.__check_config(filtered_config) def __check_config(self, filtered_config): @@ -307,9 +307,12 @@ class GeocoderConfig(ServiceConfig): return True - def __parse_config(self, filtered_config, db_config): - self._geocoder_provider = filtered_config[self.GEOCODER_PROVIDER].lower() - if not self._geocoder_provider: + def __parse_config(self, filtered_config, db_config, forced_provider): + if forced_provider: + self._geocoder_provider = forced_provider + elif filtered_config[self.GEOCODER_PROVIDER].lower(): + self._geocoder_provider = filtered_config[self.GEOCODER_PROVIDER].lower() + else: self._geocoder_provider = self.DEFAULT_PROVIDER self._geocoding_quota = float(filtered_config[self.QUOTA_KEY]) self._period_end_date = date_parse(filtered_config[self.PERIOD_END_DATE]) diff --git a/server/lib/python/cartodb_services/setup.py b/server/lib/python/cartodb_services/setup.py index 04e18a2..d01c585 100644 --- a/server/lib/python/cartodb_services/setup.py +++ b/server/lib/python/cartodb_services/setup.py @@ -10,7 +10,7 @@ from setuptools import setup, find_packages setup( name='cartodb_services', - version='0.7.4.2', + version='0.8', description='CartoDB Services API Python Library',