Fix auto guessing when importing CSV

pull/16326/head
moicalcob 3 years ago
parent ed77caf6da
commit 072a1e2d58

@ -295,7 +295,9 @@ module CartoDB
job.log "Fallback: Disabling autoguessing because there are wrong dates in the source file" job.log "Fallback: Disabling autoguessing because there are wrong dates in the source file"
@job.fallback_executed = "date" @job.fallback_executed = "date"
ogr2ogr.overwrite = true ogr2ogr.overwrite = true
ogr2ogr.csv_guessing = false ogr2ogr.csv_guessing = true
ogr2ogr.quoted_fields_guessing = false
try_fix_invalid_field(ogr2ogr.filepath, ogr2ogr.command_output)
ogr2ogr.run(append_mode) ogr2ogr.run(append_mode)
elsif ogr2ogr.encoding_error? elsif ogr2ogr.encoding_error?
job.log "Fallback: There is an encoding problem, trying with ISO-8859-1" job.log "Fallback: There is an encoding problem, trying with ISO-8859-1"
@ -385,6 +387,14 @@ module CartoDB
def statement_timeout?(error) def statement_timeout?(error)
error =~ /canceling statement due to statement timeout/i error =~ /canceling statement due to statement timeout/i
end end
def try_fix_invalid_field(filepath, command_output)
line = command_output.split("line")[1].split(':')[0].split(',')[0].delete(' ').to_i - 1
column = command_output.split("column")[1].split(':')[0].split(',')[0].delete(' ')
csv_content = CSV.read(filepath, headers: true)
csv_content[line][column] = '"' + csv_content[line][column] + '"'
File.open(filepath, "w") {|file| file.puts csv_content.to_s }
end
end end
end end
end end

@ -151,13 +151,13 @@ module CartoDB
end end
attr_accessor :append_mode, :filepath, :csv_guessing, :overwrite, :encoding, :shape_encoding, attr_accessor :append_mode, :filepath, :csv_guessing, :overwrite, :encoding, :shape_encoding,
:shape_coordinate_system, :memory_limit :shape_coordinate_system, :memory_limit, :quoted_fields_guessing
attr_reader :exit_code, :command_output attr_reader :exit_code, :command_output
private private
attr_writer :exit_code, :command_output attr_writer :exit_code, :command_output
attr_accessor :pg_options, :options, :table_name, :layer, :ogr2ogr_binary, :quoted_fields_guessing attr_accessor :pg_options, :options, :table_name, :layer, :ogr2ogr_binary
def is_csv? def is_csv?
!(filepath =~ /\.csv$/i).nil? !(filepath =~ /\.csv$/i).nil?

@ -374,9 +374,21 @@ describe 'csv regression tests' do
runner = runner_with_fixture('wrong_date.csv', nil, true) runner = runner_with_fixture('wrong_date.csv', nil, true)
runner.run runner.run
byebug
runner.results.first.success?.should eq true runner.results.first.success?.should eq true
end end
it 'auto guessing should work in csv columns if one field is causing troubles' do
runner = runner_with_fixture('wrong_date_auto_guessing.csv', nil, true)
runner.run
result = runner.results.first
@user.in_database[%Q{
SELECT *
from #{result.schema}.#{result.table_name}
}].first.fetch(:age).class.should eq (Integer)
end
def sample_for(job) def sample_for(job)
job.db[%Q{ job.db[%Q{
SELECT * SELECT *

@ -0,0 +1,12 @@
age,date,name
20,2000-01-01,name_1
21,2000-01-01,name_2
22,2000-1-NA,name_3
23,2000-01-01,name_4
24,2000-1-NA,name_5
25,2000-01-01,name_6
26,2000-1-NA,name_7
27,2000-01-01,name_8
28,2000-01-01,name_9
29,2000-1-NA,name_10
30,2000-01-01,name_11
1 age date name
2 20 2000-01-01 name_1
3 21 2000-01-01 name_2
4 22 2000-1-NA name_3
5 23 2000-01-01 name_4
6 24 2000-1-NA name_5
7 25 2000-01-01 name_6
8 26 2000-1-NA name_7
9 27 2000-01-01 name_8
10 28 2000-01-01 name_9
11 29 2000-1-NA name_10
12 30 2000-01-01 name_11
Loading…
Cancel
Save