Fix batching with negatives cartodb_id
This commit is contained in:
parent
d46d51c3bb
commit
da78b0bc65
@ -1995,6 +1995,8 @@ DECLARE
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
max_cartodb_id integer;
|
||||
min_cartodb_id integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
@ -2020,7 +2022,8 @@ BEGIN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count;
|
||||
EXECUTE format('SELECT count(1), max(cartodb_id), min(cartodb_id), ceil((max(cartodb_id) + 1 - min(cartodb_id))::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, max_cartodb_id, min_cartodb_id, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
@ -2029,8 +2032,6 @@ BEGIN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
@ -2052,13 +2053,13 @@ BEGIN
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((cartodb_id-1)::float/$1) as batch' ||
|
||||
' floor((cartodb_id - $1)::float/$2) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
'WHERE batch = $3', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING min_cartodb_id, batch_size, cartodb_id_batch;
|
||||
|
||||
GET DIAGNOSTICS current_row_count = ROW_COUNT;
|
||||
RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count;
|
||||
|
@ -6,6 +6,8 @@ DECLARE
|
||||
enough_quota boolean;
|
||||
remaining_quota integer;
|
||||
max_batch_size integer;
|
||||
max_cartodb_id integer;
|
||||
min_cartodb_id integer;
|
||||
|
||||
cartodb_id_batch integer;
|
||||
batches_n integer;
|
||||
@ -31,7 +33,8 @@ BEGIN
|
||||
batch_size := MAX_SAFE_BATCH_SIZE;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT COUNT(1) from (%s) _x', query) INTO query_row_count;
|
||||
EXECUTE format('SELECT count(1), max(cartodb_id), min(cartodb_id), ceil((max(cartodb_id) + 1 - min(cartodb_id))::float/%s) FROM (%s) _x', batch_size, query)
|
||||
INTO query_row_count, max_cartodb_id, min_cartodb_id, batches_n;
|
||||
|
||||
RAISE DEBUG 'cdb_bulk_geocode_street_point --> query_row_count: %; query: %; country: %; state: %; city: %; street: %',
|
||||
query_row_count, query, country_column, state_column, city_column, street_column;
|
||||
@ -40,8 +43,6 @@ BEGIN
|
||||
RAISE EXCEPTION 'Remaining quota: %. Estimated cost: %', remaining_quota, query_row_count;
|
||||
END IF;
|
||||
|
||||
EXECUTE format('SELECT ceil(max(cartodb_id)::float/%s) FROM (%s) _x', batch_size, query) INTO batches_n;
|
||||
|
||||
RAISE DEBUG 'batches_n: %', batches_n;
|
||||
|
||||
temp_table_name := 'bulk_geocode_street_' || md5(random()::text);
|
||||
@ -63,13 +64,13 @@ BEGIN
|
||||
'WITH geocoding_data as (' ||
|
||||
' SELECT ' ||
|
||||
' json_build_object(''id'', cartodb_id, ''address'', %s, ''city'', %s, ''state'', %s, ''country'', %s) as data , ' ||
|
||||
' floor((cartodb_id-1)::float/$1) as batch' ||
|
||||
' floor((cartodb_id - $1)::float/$2) as batch' ||
|
||||
' FROM (%s) _x' ||
|
||||
') ' ||
|
||||
'INSERT INTO %s SELECT (cdb_dataservices_client._cdb_bulk_geocode_street_point(jsonb_agg(data))).* ' ||
|
||||
'FROM geocoding_data ' ||
|
||||
'WHERE batch = $2', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING batch_size, cartodb_id_batch;
|
||||
'WHERE batch = $3', street_column, city_column, state_column, country_column, query, temp_table_name)
|
||||
USING min_cartodb_id, batch_size, cartodb_id_batch;
|
||||
|
||||
GET DIAGNOSTICS current_row_count = ROW_COUNT;
|
||||
RAISE DEBUG 'Batch % --> %', cartodb_id_batch, current_row_count;
|
||||
|
@ -287,11 +287,14 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
|
||||
Useful just to test a good batch size
|
||||
"""
|
||||
n = 110
|
||||
first_cartodb_id = -1
|
||||
first_street_number = 1
|
||||
batch_size = 'NULL' # NULL for optimal
|
||||
streets = []
|
||||
for i in range(0, n):
|
||||
streets.append('{{"cartodb_id": {}, "address": "{} Yonge Street, ' \
|
||||
'Toronto, Canada"}}'.format(i, i))
|
||||
'Toronto, Canada"}}'.format(first_cartodb_id + i,
|
||||
first_street_number + i))
|
||||
|
||||
query = "select *, st_x(the_geom), st_y(the_geom) " \
|
||||
"FROM cdb_dataservices_client.cdb_bulk_geocode_street_point( " \
|
||||
@ -300,7 +303,9 @@ class TestBulkStreetFunctions(TestStreetFunctionsSetUp):
|
||||
"]''::jsonb) as (cartodb_id integer, address text)', " \
|
||||
"'address', null, null, null, {})".format(','.join(streets), batch_size)
|
||||
response = self._run_authenticated(query)
|
||||
assert_equal(n - 1, len(response['rows']))
|
||||
assert_equal(n, len(response['rows']))
|
||||
for row in response['rows']:
|
||||
assert_not_equal(row['st_x'], None)
|
||||
|
||||
def test_missing_components_on_private_function(self):
|
||||
query = "SELECT _cdb_bulk_geocode_street_point(" \
|
||||
|
Loading…
Reference in New Issue
Block a user