2018-06-22 18:19:51 +08:00
|
|
|
CREATE OR REPLACE FUNCTION cdb_dataservices_client.cdb_bulk_geocode_street_point (query text,
|
2018-07-10 21:17:14 +08:00
|
|
|
street_column text, city_column text default null, state_column text default null, country_column text default null, batch_size integer DEFAULT 50)
|
2018-06-15 15:55:52 +08:00
|
|
|
RETURNS SETOF cdb_dataservices_client.geocoding AS $$
|
|
|
|
DECLARE
|
2018-06-22 15:41:29 +08:00
|
|
|
query_row_count integer;
|
|
|
|
enough_quota boolean;
|
2018-06-28 19:06:52 +08:00
|
|
|
remaining_quota integer;
|
2018-06-21 00:30:03 +08:00
|
|
|
|
|
|
|
cartodb_id_batch integer;
|
|
|
|
batches_n integer;
|
2018-06-26 19:35:24 +08:00
|
|
|
DEFAULT_BATCH_SIZE CONSTANT numeric := 100;
|
2018-07-03 00:35:36 +08:00
|
|
|
MAX_BATCH_SIZE CONSTANT numeric := 10000;
|
2018-06-21 00:30:03 +08:00
|
|
|
current_row_count integer ;
|
2018-06-22 18:35:29 +08:00
|
|
|
|
|
|
|
temp_table_name text;
|
2018-06-15 15:55:52 +08:00
|
|
|
BEGIN
|
2018-06-26 19:35:24 +08:00
|
|
|
IF batch_size IS NULL THEN
|
2018-06-28 19:06:52 +08:00
|
|
|
RAISE EXCEPTION 'batch_size can''t be null';
|
2018-06-26 19:35:24 +08:00
|
|
|
ELSIF batch_size > MAX_BATCH_SIZE THEN
|
|
|
|
RAISE EXCEPTION 'batch_size must be lower than %', MAX_BATCH_SIZE + 1;
|
|
|
|
END IF;
|
|
|
|
|
2018-06-22 15:41:29 +08:00
|
|
|
EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count;
|
2018-06-21 00:30:03 +08:00
|
|
|
|
2018-06-22 18:19:51 +08:00
|
|
|
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
|
|
|
query_row_count, query, country_column, state_column, city_column, street_column;
|
2018-06-22 15:41:29 +08:00
|
|
|
SELECT cdb_dataservices_client.cdb_enough_quota('hires_geocoder', query_row_count) INTO enough_quota;
|
2018-06-28 01:07:19 +08:00
|
|
|
IF enough_quota IS NOT NULL AND NOT enough_quota THEN
|
2018-06-28 19:06:52 +08:00
|
|
|
SELECT csqi.monthly_quota - csqi.used_quota AS remaining_quota
|
|
|
|
INTO remaining_quota
|
|
|
|
FROM cdb_dataservices_client.cdb_service_quota_info() csqi
|
|
|
|
WHERE service = 'hires_geocoder';
|
2018-06-22 15:41:29 +08:00
|
|
|
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
2018-06-21 00:30:03 +08:00
|
|
|
END IF;
|
|
|
|
|
2018-06-26 19:35:24 +08:00
|
|
|
EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n;
|
2018-06-21 00:30:03 +08:00
|
|
|
|
|
|
|
RAISE DEBUG 'batches_n: %', batches_n;
|
|
|
|
|
2018-06-22 18:35:29 +08:00
|
|
|
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
|
|
|
|
|
|
|
EXECUTE format('CREATE TEMPORARY TABLE %s ' ||
|
|
|
|
'(cartodb_id integer, the_geom geometry(Multipolygon,4326), metadata jsonb)',
|
|
|
|
temp_table_name);
|
2018-06-21 00:30:03 +08:00
|
|
|
|
2018-06-26 19:35:24 +08:00
|
|
|
select
|
|
|
|
coalesce(street_column, ''''''), coalesce(city_column, ''''''),
|
|
|
|
coalesce(state_column, ''''''), coalesce(country_column, '''''')
|
|
|
|
into street_column, city_column, state_column, country_column;
|
|
|
|
|
2018-06-21 00:30:03 +08:00
|
|
|
FOR cartodb_id_batch in 0..(batches_n - 1)
|
|
|
|
LOOP
|
|
|
|
|
|
|
|
EXECUTE format(
|
|
|
|
'WITH geocoding_data as (' ||
|
2018-06-22 18:19:51 +08:00
|
|
|
' SELECT ' ||
|
|
|
|
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
|
|
|
' floor((cartodb_id-1)::float/$1) as batch' ||
|
2018-06-21 00:30:03 +08:00
|
|
|
' FROM (%s) _x' ||
|
2018-06-22 18:35:29 +08:00
|
|
|
') ' ||
|
|
|
|
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
2018-06-21 00:30:03 +08:00
|
|
|
'FROM geocoding_data ' ||
|
2018-06-22 18:35:29 +08:00
|
|
|
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
2018-06-26 19:35:24 +08:00
|
|
|
USING batch_size, cartodb_id_batch;
|
2018-06-21 00:30:03 +08:00
|
|
|
|
|
|
|
GET DIAGNOSTICS current_row_count = ROW_COUNT;
|
|
|
|
RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count;
|
|
|
|
|
|
|
|
END LOOP;
|
|
|
|
|
2018-06-22 18:35:29 +08:00
|
|
|
RETURN QUERY EXECUTE 'SELECT * FROM ' || quote_ident(temp_table_name);
|
2018-06-15 15:55:52 +08:00
|
|
|
END;
|
2018-06-21 00:30:03 +08:00
|
|
|
$$ LANGUAGE 'plpgsql' SECURITY DEFINER VOLATILE PARALLEL UNSAFE;
|