Merge pull request #540 from CartoDB/538-multipolygon_geocoding_batch
538 multipolygon geocoding batch
This commit is contained in:
commit
4f5745988f
5
NEWS.md
5
NEWS.md
@ -1,3 +1,8 @@
|
||||
Dec 27th, 2018
|
||||
==============
|
||||
* Version `0.35.1` of the server extension and `0.26.1` of the client
|
||||
* Now the bulk geocoder returns `Point` geometries instead of MultiPolygons (#538)
|
||||
|
||||
Nov 30th, 2018
|
||||
==============
|
||||
* Version `0.35.0` of the server extension
|
||||
|
@ -7,7 +7,7 @@
|
||||
"carto_postgresql_ext": "^0.23.0"
|
||||
},
|
||||
"works_with": {
|
||||
"dataservices-api-server-extension": "^0.32.0"
|
||||
"dataservices-api-server-extension": "^0.35.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
99
client/cdb_dataservices_client--0.26.0--0.26.1.sql
Normal file
99
client/cdb_dataservices_client--0.26.0--0.26.1.sql
Normal file
@ -0,0 +1,99 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '0.26.1'" to load this file. \quit
|
||||
|
||||
-- Make sure we have a sane search path to create/update the extension
|
||||
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
ALTER TYPE cdb_dataservices_client.geocoding ALTER ATTRIBUTE the_geom TYPE geometry(Point,4326);
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
||||
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
query_row_count integer;
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
||||
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
|
||||
|
||||
temp_table_name text;
|
||||
username text;
|
||||
orgname text;
|
||||
apikey_permissions json;
|
||||
BEGIN
|
||||
SELECT u, o, p INTO username, orgname, apikey_permissions FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
|
||||
IF apikey_permissions IS NULL OR NOT apikey_permissions::jsonb ? 'geocoding' THEN
|
||||
RAISE EXCEPTION 'Geocoding permission denied' USING ERRCODE = '01007';
|
||||
END IF;
|
||||
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
|
||||
INTO remaining_quota, max_batch_size
|
||||
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
|
||||
WHERE service = 'hires_geocoder';
|
||||
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
|
||||
|
||||
IF batch_size IS NULL THEN
|
||||
batch_size := max_batch_size;
|
||||
ELSIF batch_size > max_batch_size THEN
|
||||
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
|
||||
END IF;
|
||||
|
||||
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
|
||||
IF remaining_quota < query_row_count THEN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
|
||||
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
||||
'(cartodb_id integer, the_geom geometry(Point,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
||||
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
||||
into street_column, city_column, state_column, country_column;
|
||||
|
||||
IF batches_n > 0 THEN
|
||||
FOR cartodb_id_batch in 0..(batches_n - 1)
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((row_number() over () - 1)::float/$1) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
|
99
client/cdb_dataservices_client--0.26.1--0.26.0.sql
Normal file
99
client/cdb_dataservices_client--0.26.1--0.26.0.sql
Normal file
@ -0,0 +1,99 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_client UPDATE TO '0.26.0'" to load this file. \quit
|
||||
|
||||
-- Make sure we have a sane search path to create/update the extension
|
||||
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
ALTER TYPE cdb_dataservices_client.geocoding ALTER ATTRIBUTE the_geom TYPE geometry(MultiPolygon,4326);
|
||||
|
||||
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
||||
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT NULL)
|
||||
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
||||
DECLARE
|
||||
query_row_count integer;
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
||||
MAX_SAFE_BATCH_SIZE CONSTANT numeric := 5000;
|
||||
|
||||
temp_table_name text;
|
||||
username text;
|
||||
orgname text;
|
||||
apikey_permissions json;
|
||||
BEGIN
|
||||
SELECT u, o, p INTO username, orgname, apikey_permissions FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text, p json);
|
||||
IF apikey_permissions IS NULL OR NOT apikey_permissions::jsonb ? 'geocoding' THEN
|
||||
RAISE EXCEPTION 'Geocoding permission denied' USING ERRCODE = '01007';
|
||||
END IF;
|
||||
|
||||
-- JSON value stored "" is taken as literal
|
||||
IF username IS NULL OR username = '' OR username = '""' THEN
|
||||
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
|
||||
END IF;
|
||||
|
||||
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota, csqi.max_batch_size
|
||||
INTO remaining_quota, max_batch_size
|
||||
FROM cdb_dataservices_client.cdb_service_quota_info_batch() csqi
|
||||
WHERE service = 'hires_geocoder';
|
||||
RAISE DEBUG 'remaining_quota: %; max_batch_size: %', remaining_quota, max_batch_size;
|
||||
|
||||
IF batch_size IS NULL THEN
|
||||
batch_size := max_batch_size;
|
||||
ELSIF batch_size > max_batch_size THEN
|
||||
RAISE EXCEPTION 'batch_size must be lower than %', max_batch_size + 1;
|
||||
END IF;
|
||||
|
||||
IF batch_size > MAX_SAFE_BATCH_SIZE THEN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT count(1), ceil(count(1)::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
|
||||
IF remaining_quota < query_row_count THEN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
|
||||
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
||||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
||||
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
||||
into street_column, city_column, state_column, country_column;
|
||||
|
||||
IF batches_n > 0 THEN
|
||||
FOR cartodb_id_batch in 0..(batches_n - 1)
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((row_number() over () - 1)::float/$1) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
|
||||
END;
|
||||
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
|
5606
client/cdb_dataservices_client--0.26.1.sql
Normal file
5606
client/cdb_dataservices_client--0.26.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB dataservices client API extension'
|
||||
default_version = '0.26.0'
|
||||
default_version = '0.26.1'
|
||||
requires = 'plproxy, cartodb'
|
||||
superuser = true
|
||||
schema = cdb_dataservices_client
|
||||
|
@ -6,7 +6,7 @@ CREATE TYPE cdb_dataservices_client.isoline AS (
|
||||
|
||||
CREATE TYPE cdb_dataservices_client.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
the_geom geometry(Point,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
|
@ -58,7 +58,7 @@ BEGIN
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
|
||||
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
||||
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
||||
'(cartodb_id integer, the_geom geometry(Point,4326), metadata jsonb)',
|
||||
temp_table_name);
|
||||
|
||||
select
|
||||
|
@ -0,0 +1,6 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '0.35.1'" to load this file. \quit
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
ALTER TYPE cdb_dataservices_server.geocoding ALTER ATTRIBUTE the_geom TYPE geometry(Point,4326);
|
@ -0,0 +1,6 @@
|
||||
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '0.35.0'" to load this file. \quit
|
||||
|
||||
-- HERE goes your code to upgrade/downgrade
|
||||
ALTER TYPE cdb_dataservices_server.geocoding ALTER ATTRIBUTE the_geom TYPE geometry(MultiPolygon,4326);
|
3808
server/extension/cdb_dataservices_server--0.35.1.sql
Normal file
3808
server/extension/cdb_dataservices_server--0.35.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB dataservices server extension'
|
||||
default_version = '0.35.0'
|
||||
default_version = '0.35.1'
|
||||
requires = 'plpythonu, plproxy, postgis, cdb_geocoder'
|
||||
superuser = true
|
||||
schema = cdb_dataservices_server
|
||||
|
@ -1,7 +1,7 @@
|
||||
-- TODO: could cartodb_id be replaced by rowid, maybe needing extra care for offset?
|
||||
CREATE TYPE cdb_dataservices_server.geocoding AS (
|
||||
cartodb_id integer,
|
||||
the_geom geometry(Multipolygon,4326),
|
||||
the_geom geometry(Point,4326),
|
||||
metadata jsonb
|
||||
);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user