From da78b0bc654986679a9599577a2740b6822cbdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Ignacio=20S=C3=A1nchez=20Lara?= Date: Wed, 11 Jul 2018 12:28:39 +0200 Subject: [PATCH] Fix batching with negatives cartodb_id --- client/cdb_dataservices_client--0.25.0.sql | 13 +++++++------ client/sql/21_bulk_geocoding_functions.sql | 13 +++++++------ test/integration/test_street_functions.py | 9 +++++++-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/client/cdb_dataservices_client--0.25.0.sql b/client/cdb_dataservices_client--0.25.0.sql index 2bed351..af58ce2 100644 --- a/client/cdb_dataservices_client--0.25.0.sql +++ b/client/cdb_dataservices_client--0.25.0.sql @@ -1995,6 +1995,8 @@ DECLARE enough_quota boolean; remaining_quota integer; max_batch_size integer; + max_cartodb_id integer; + min_cartodb_id integer; cartodb_id_batch integer; batches_n integer; @@ -2020,7 +2022,8 @@ BEGIN batch_size := MAX_SAFE_BATCH_SIZE; END IF; - EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count; + EXECUTE format('SELECT count(1), max(cartodb_id), min(cartodb_id), ceil((max(cartodb_id) + 1 - min(cartodb_id))::float/%s) FROM (%s) _x', batch_size, query) + INTO query_row_count, max_cartodb_id, min_cartodb_id, batches_n; RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %', query_row_count, query, country_column, state_column, city_column, street_column; @@ -2029,8 +2032,6 @@ BEGIN RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count; END IF; - EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n; - RAISE DEBUG 'batches_n: %', batches_n; temp_table_name := 'bulk_geocode_street_' || md5(random()::text); @@ -2052,13 +2053,13 @@ BEGIN 'WITH geocoding_data as (' || ' SELECT ' || ' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' || - ' floor((cartodb_id-1)::float/$1) as batch' || + ' floor((cartodb_id - $1)::float/$2) as batch' || ' FROM (%s) _x' || ') ' || 'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' || 'FROM geocoding_data ' || - 'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name) - USING batch_size, cartodb_id_batch; + 'WHERE batch = $3', street_column, city_column, state_column, country_column, query, temp_table_name) + USING min_cartodb_id, batch_size, cartodb_id_batch; GET DIAGNOSTICS current_row_count = ROW_COUNT; RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count; diff --git a/client/sql/21_bulk_geocoding_functions.sql b/client/sql/21_bulk_geocoding_functions.sql index f9ba824..7ab3407 100644 --- a/client/sql/21_bulk_geocoding_functions.sql +++ b/client/sql/21_bulk_geocoding_functions.sql @@ -6,6 +6,8 @@ DECLARE enough_quota boolean; remaining_quota integer; max_batch_size integer; + max_cartodb_id integer; + min_cartodb_id integer; cartodb_id_batch integer; batches_n integer; @@ -31,7 +33,8 @@ BEGIN batch_size := MAX_SAFE_BATCH_SIZE; END IF; - EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count; + EXECUTE format('SELECT count(1), max(cartodb_id), min(cartodb_id), ceil((max(cartodb_id) + 1 - min(cartodb_id))::float/%s) FROM (%s) _x', batch_size, query) + INTO query_row_count, max_cartodb_id, min_cartodb_id, batches_n; RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %', query_row_count, query, country_column, state_column, city_column, street_column; @@ -40,8 +43,6 @@ BEGIN RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count; END IF; - EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n; - RAISE DEBUG 'batches_n: %', batches_n; temp_table_name := 'bulk_geocode_street_' || md5(random()::text); @@ -63,13 +64,13 @@ BEGIN 'WITH geocoding_data as (' || ' SELECT ' || ' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' || - ' floor((cartodb_id-1)::float/$1) as batch' || + ' floor((cartodb_id - $1)::float/$2) as batch' || ' FROM (%s) _x' || ') ' || 'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' || 'FROM geocoding_data ' || - 'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name) - USING batch_size, cartodb_id_batch; + 'WHERE batch = $3', street_column, city_column, state_column, country_column, query, temp_table_name) + USING min_cartodb_id, batch_size, cartodb_id_batch; GET DIAGNOSTICS current_row_count = ROW_COUNT; RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count; diff --git a/test/integration/test_street_functions.py b/test/integration/test_street_functions.py index cca42ca..43186b9 100644 --- a/test/integration/test_street_functions.py +++ b/test/integration/test_street_functions.py @@ -287,11 +287,14 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp): Useful just to test a good batch size """ n = 110 + first_cartodb_id = -1 + first_street_number = 1 batch_size = 'NULL' # NULL for optimal streets = [] for i in range(0, n): streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \ - 'Toronto, Canada"}}'.format(i, i)) + 'Toronto, Canada"}}'.format(first_cartodb_id + i, + first_street_number + i)) query = "select *, st_x(the_geom), st_y(the_geom) " \ "FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \ @@ -300,7 +303,9 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp): "]''::jsonb) as (cartodb_id integer, address text)', " \ "'address', null, null, null, {})".format(','.join(streets), batch_size) response = self._run_authenticated(query) - assert_equal(n - 1, len(response['rows'])) + assert_equal(n, len(response['rows'])) + for row in response['rows']: + assert_not_equal(row['st_x'], None) def test_missing_components_on_private_function(self): query = "SELECT _cdb_bulk_geocode_street_point(" \