CDB-2087 Add two new attributes to geocodings

processable_rows: number of rows on the table that could be affected
  by the geocoding job
real_rows: number of correctly geocoded rows (including duplicates) This
  is different from processed_rows, that is the number of processed
  geocoding strings as returned by HERE API
pull/394/head
David Arango 11 years ago
parent dc151f5a56
commit a108918b17

@ -60,9 +60,8 @@ class Api::Json::GeocodingsController < Api::ApplicationController
end
def estimation_for
dataset = current_user.in_database.select.from(params[:table_name])
dataset = dataset.where(cartodb_georef_status: nil) if dataset.columns.include?(:cartodb_georef_status)
total_rows = dataset.count
table = current_user.tables.where(name: params[:table_name]).first
total_rows = Geocoding.processable_rows(table)
remaining_blocks = (current_user.geocoding_quota - current_user.get_geocoding_calls) / User::GEOCODING_BLOCK_SIZE
remaining_blocks = (remaining_blocks > 0 ? remaining_blocks : 0)
needed_blocks = (total_rows / User::GEOCODING_BLOCK_SIZE) - remaining_blocks

@ -8,7 +8,7 @@ class Geocoding < Sequel::Model
ALLOWED_KINDS = %w(admin0 admin1 namedplace postalcode high-resolution ipaddress)
PUBLIC_ATTRIBUTES = [:id, :table_id, :state, :kind, :country_code, :formatter, :geometry_type,
:error, :processed_rows, :cache_hits]
:error, :processed_rows, :cache_hits, :processable_rows, :real_rows]
many_to_one :user
many_to_one :table
@ -68,28 +68,37 @@ class Geocoding < Sequel::Model
end # cancel
def run!
self.update(state: 'started')
self.update state: 'started', processable_rows: self.class.processable_rows(table)
rows_geocoded_before = table.owner.in_database.select.from(table.name).where(cartodb_georef_status: true).count rescue 0
table_geocoder.run
self.update remote_id: table_geocoder.remote_id
started = Time.now
begin
self.update(table_geocoder.update_geocoding_status)
puts "Processed: #{processed_rows}"
raise "Geocoding timeout" if Time.now - started > run_timeout and ['started', 'submitted', 'accepted'].include? state
raise 'Geocoding timeout' if Time.now - started > run_timeout and ['started', 'submitted', 'accepted'].include? state
raise 'Geocoding failed' if state == 'failed'
sleep(2)
end until ['completed', 'cancelled', 'failed'].include? state
end until ['completed', 'cancelled'].include? state
return false if state == 'cancelled'
self.update(cache_hits: table_geocoder.cache.hits) if table_geocoder.respond_to?(:cache)
Statsd.gauge("geocodings.requests", "+#{self.processed_rows}") rescue nil
Statsd.gauge("geocodings.cache_hits", "+#{self.cache_hits}") rescue nil
table_geocoder.process_results
table_geocoder.process_results if state == 'completed'
create_automatic_geocoding if automatic_geocoding_id.blank?
self.update(state: 'finished')
rows_geocoded_after = table.owner.in_database.select.from(table.name).where(cartodb_georef_status: true).count rescue 0
self.update(state: 'finished', real_rows: rows_geocoded_after - rows_geocoded_before)
rescue => e
self.update(state: 'failed', processed_rows: 0, cache_hits: 0)
CartoDB::notify_exception(e, user: user)
end # run!
def self.processable_rows(table)
dataset = table.owner.in_database.select.from(table.name)
dataset = dataset.where(cartodb_georef_status: nil) if dataset.columns.include?(:cartodb_georef_status)
dataset.count
end # self.processable_rows
def create_automatic_geocoding
# Disabled until we stop sending previously failed rows
# best way to do this: use append mode on synchronizations

@ -0,0 +1,7 @@
Sequel.migration do
change do
alter_table :geocodings do
add_column :processable_rows, :bigint
end
end
end

@ -0,0 +1,7 @@
Sequel.migration do
change do
alter_table :geocodings do
add_column :real_rows, :bigint
end
end
end

@ -31,12 +31,12 @@ describe Geocoding do
end
it 'returns an instance of InternalGeocoder when kind is not high-resolution' do
geocoding = FactoryGirl.build(:geocoding, user: @user, table: @table, kind: 'admin0')
geocoding = FactoryGirl.build(:geocoding, user: @user, table: @table, kind: 'admin0', geometry_type: 'point')
geocoding.table_geocoder.should be_kind_of(CartoDB::InternalGeocoder)
end
it 'memoizes' do
geocoding = FactoryGirl.build(:geocoding, user: @user, table: @table, kind: 'admin0')
geocoding = FactoryGirl.build(:geocoding, user: @user, table: @table, kind: 'admin0', geometry_type: 'point')
geocoder = geocoding.table_geocoder
geocoder.should be_kind_of(CartoDB::InternalGeocoder)
geocoder.should eq geocoding.table_geocoder
@ -47,11 +47,13 @@ describe Geocoding do
let(:geocoding) { FactoryGirl.build(:geocoding, user: @user, table: @table) }
it 'validates formatter' do
geocoding.raise_on_save_failure = true
expect { geocoding.save }.to raise_error(Sequel::ValidationFailed)
geocoding.errors[:formatter].join(',').should match /is not present/
end
it 'validates kind' do
geocoding.raise_on_save_failure = true
geocoding.kind = 'nonsense'
expect { geocoding.save }.to raise_error(Sequel::ValidationFailed)
geocoding.errors[:kind].join(',').should match /is not in range or set/
@ -83,7 +85,7 @@ describe Geocoding do
end
describe '#run!' do
it 'updates processed_rows, cache_hits and state' do
it 'updates geocoding stats' do
geocoding = FactoryGirl.create(:geocoding, user: @user, table: @table, formatter: 'b')
geocoding.table_geocoder.stubs(:run).returns true
geocoding.table_geocoder.stubs(:cache).returns OpenStruct.new(hits: 5)
@ -137,7 +139,7 @@ describe Geocoding do
@user.stubs('hard_geocoding_limit?').returns(true)
delete_user_data @user
geocoding.max_geocodable_rows.should eq 200
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100)
FactoryGirl.create(:geocoding, user: @user, processed_rows: 100, remote_id: 'wadus')
geocoding.max_geocodable_rows.should eq 100
end

@ -64,7 +64,7 @@ describe "Geocodings API" do
describe 'GET /api/v1/geocodings' do
it 'returns every geocoding belonging to current_user' do
FactoryGirl.create(:geocoding, table_name: 'a', formatter: 'b', user: @user, state: 'wadus')
FactoryGirl.create(:geocoding, table_name: 'a', formatter: 'b', user_id: @user.id+1)
FactoryGirl.create(:geocoding, table_name: 'a', formatter: 'b', user_id: UUIDTools::UUID.timestamp_create.to_s)
get_json v1_geocodings_url(params) do |response|
response.status.should be_success
response.body[:geocodings].size.should == 1
@ -74,8 +74,8 @@ describe "Geocodings API" do
describe 'GET /api/v1/geocodings/:id' do
it 'returns a geocoding' do
geocoding = FactoryGirl.create(:geocoding, table_id: 1, formatter: 'b', user: @user)
FactoryGirl.create(:geocoding, table_id: 2, formatter: 'b', user_id: @user.id+1)
geocoding = FactoryGirl.create(:geocoding, table_id: UUIDTools::UUID.timestamp_create.to_s, formatter: 'b', user: @user)
FactoryGirl.create(:geocoding, table_id: UUIDTools::UUID.timestamp_create.to_s, formatter: 'b', user_id: UUIDTools::UUID.timestamp_create.to_s)
get_json v1_geocoding_url(params.merge(id: geocoding.id)) do |response|
response.status.should be_success
@ -84,7 +84,7 @@ describe "Geocodings API" do
end
it 'does not return a geocoding owned by another user' do
geocoding = FactoryGirl.create(:geocoding, table_id: 1, formatter: 'b', user_id: @user.id + 1)
geocoding = FactoryGirl.create(:geocoding, table_id: UUIDTools::UUID.timestamp_create.to_s, formatter: 'b', user_id: UUIDTools::UUID.timestamp_create.to_s)
get_json v1_geocoding_url(params.merge(id: geocoding.id)) do |response|
response.status.should eq 404
@ -94,7 +94,7 @@ describe "Geocodings API" do
describe 'PUT /api/v1/geocodings/:id' do
it 'cancels a geocoding job' do
geocoding = FactoryGirl.create(:geocoding, table_id: 2, formatter: 'b', user: @user)
geocoding = FactoryGirl.create(:geocoding, table_id: UUIDTools::UUID.timestamp_create.to_s, formatter: 'b', user: @user)
Geocoding.any_instance.stubs(:cancel).returns(true)
put_json v1_geocoding_url(params.merge(id: geocoding.id)), { state: 'cancelled' } do |response|
@ -105,7 +105,7 @@ describe "Geocodings API" do
end
it 'fails gracefully on job cancel failure' do
geocoding = FactoryGirl.create(:geocoding, table_id: 1, formatter: 'b', user: @user)
geocoding = FactoryGirl.create(:geocoding, table_id: UUIDTools::UUID.timestamp_create.to_s, formatter: 'b', user: @user)
Geocoding.any_instance.stubs(:cancel).raises('wadus')
put_json v1_geocoding_url(params.merge(id: geocoding.id)), { state: 'cancelled' } do |response|

Loading…
Cancel
Save