From 29c8d57fdddc685cf12b6fb747f5b677fa14fa2c Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Fri, 22 Jan 2016 12:38:49 +0100 Subject: [PATCH 1/3] Improve empty CSV files handling For empty CSVs, which usually came from unsupported Excel files, such as sheets with graphs, the normalization was crashing. The first_line method was returning the `stream.rewind` value in case that the file was empty, and therefore the first_line would be nil. This commit also edits the CSV column number checking in order to be able to admit empty files (otherwise, nil.length would raise an exception). --- services/importer/lib/importer/csv_normalizer.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/services/importer/lib/importer/csv_normalizer.rb b/services/importer/lib/importer/csv_normalizer.rb index 0dcd9cfa66..c123ce990c 100644 --- a/services/importer/lib/importer/csv_normalizer.rb +++ b/services/importer/lib/importer/csv_normalizer.rb @@ -169,7 +169,7 @@ module CartoDB end def single_column? - ::CSV.parse(first_line, csv_options).first.length < 2 + ::CSV.parse(first_line, csv_options).first.to_s.length < 2 end def multiple_column(row) @@ -208,8 +208,9 @@ module CartoDB def first_line return @first_line if @first_line stream.rewind - @first_line ||= stream.gets + @first_line ||= stream.gets || '' stream.rewind + @first_line end def release From d0851fa3142322a381c639b869ec20a679c9f8cc Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Fri, 22 Jan 2016 13:17:46 +0100 Subject: [PATCH 2/3] Fix empty CSV header handling and add test --- lib/cartodb/import_error_codes.rb | 4 ++-- .../importer/lib/importer/csv_normalizer.rb | 7 ++++++- .../importer/spec/unit/csv_normalizer_spec.rb | 20 ++++++++++++++++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/lib/cartodb/import_error_codes.rb b/lib/cartodb/import_error_codes.rb index 4a349d2f2c..4ecb8caf7e 100644 --- a/lib/cartodb/import_error_codes.rb +++ b/lib/cartodb/import_error_codes.rb @@ -40,8 +40,8 @@ module CartoDB source: ERROR_SOURCE_USER }, 1005 => { - title: 'Zero byte file', - what_about: "The file appears to have no information. Double check using a local tool such as QGIS that the file is indeed correct. If everything appears fine, try uploading it again or contact us.", + title: 'Empty file', + what_about: "The file appears to have no processable information. Double check that the file is indeed correct and it contains supported data. If everything appears fine, try uploading it again or contact us.", source: ERROR_SOURCE_USER }, 1006 => { diff --git a/services/importer/lib/importer/csv_normalizer.rb b/services/importer/lib/importer/csv_normalizer.rb index c123ce990c..731808a69d 100644 --- a/services/importer/lib/importer/csv_normalizer.rb +++ b/services/importer/lib/importer/csv_normalizer.rb @@ -169,7 +169,12 @@ module CartoDB end def single_column? - ::CSV.parse(first_line, csv_options).first.to_s.length < 2 + columns = ::CSV.parse(first_line, csv_options) + if !columns.any? + raise EmptyFileError.new + else + columns.first.length < 2 + end end def multiple_column(row) diff --git a/services/importer/spec/unit/csv_normalizer_spec.rb b/services/importer/spec/unit/csv_normalizer_spec.rb index 80dacee42c..8820b0aea4 100644 --- a/services/importer/spec/unit/csv_normalizer_spec.rb +++ b/services/importer/spec/unit/csv_normalizer_spec.rb @@ -9,7 +9,7 @@ include CartoDB::Importer2::Doubles describe CartoDB::Importer2::CsvNormalizer do BUG_COLUMNS_WRONG_SPLIT_FIXTURE_FILE = "#{File.dirname(__FILE__)}/bug_columns_wrong_split.csv" - + describe '#run' do it 'transforms the file using a proper comma delimiter' do fixture = tab_delimiter_factory @@ -21,6 +21,16 @@ describe CartoDB::Importer2::CsvNormalizer do csv.run csv.delimiter.should eq ',' end + it 'raise if detects an empty file' do + fixture = empty_file_factory + + csv = CartoDB::Importer2::CsvNormalizer.new(fixture, Log.new) + expect { + csv.run + }.to raise_exception CartoDB::Importer2::EmptyFileError + + FileUtils.rm(fixture) + end end describe '#detect_delimiter' do @@ -215,6 +225,14 @@ describe CartoDB::Importer2::CsvNormalizer do filepath end + def empty_file_factory + filepath = get_temp_csv_fullpath + + FileUtils.touch(filepath) + + filepath + end + def quoted_string_with_delimiter_factory filepath = get_temp_csv_fullpath From 3817554755d9b75104bd0e9a4e984ff83cb05d44 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Fri, 22 Jan 2016 18:41:26 +0100 Subject: [PATCH 3/3] Style fixes --- services/importer/lib/importer/csv_normalizer.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/services/importer/lib/importer/csv_normalizer.rb b/services/importer/lib/importer/csv_normalizer.rb index 731808a69d..a5097c4e67 100644 --- a/services/importer/lib/importer/csv_normalizer.rb +++ b/services/importer/lib/importer/csv_normalizer.rb @@ -170,11 +170,8 @@ module CartoDB def single_column? columns = ::CSV.parse(first_line, csv_options) - if !columns.any? - raise EmptyFileError.new - else - columns.first.length < 2 - end + raise EmptyFileError.new if !columns.any? + columns.first.length < 2 end def multiple_column(row)