Fixes an issue with bad latin1 encodings in dbf files

2.0
Fernando Espinosa 12 years ago
parent 55eb04f6a8
commit 7467096e52

@ -73,7 +73,7 @@ group :development, :test do
gem "mocha", "0.10.5"
# TODO fix debugger readline conflicts with resque
#gem "debugger", "1.2.0"
gem "debugger", "1.2.0"
gem "steak", "2.0.0"
gem "rspec-rails", "2.10.1"
@ -84,4 +84,5 @@ group :development, :test do
gem "rack-reverse-proxy", "0.4.4", :require => 'rack/reverse_proxy'
gem "foreman", "0.46.0"
gem "aws-sdk", "1.5.2"
gem "factory_girl_rails", "~> 4.0.0"
end

@ -58,12 +58,20 @@ GEM
ffi (~> 1.0, >= 1.0.6)
choice (0.1.6)
chunky_png (1.2.6)
columnize (0.3.6)
compass (0.12.2)
chunky_png (~> 1.2)
fssm (>= 0.2.7)
sass (~> 3.1)
compass-rails (1.0.3)
compass (>= 0.12.2, < 0.14)
debugger (1.2.0)
columnize (>= 0.3.1)
debugger-linecache (~> 1.1.1)
debugger-ruby_core_source (~> 1.1.3)
debugger-linecache (1.1.2)
debugger-ruby_core_source (>= 1.1.1)
debugger-ruby_core_source (1.1.3)
diff-lcs (1.1.3)
ejs (1.1.1)
email_spec (1.2.1)
@ -73,6 +81,11 @@ GEM
eventmachine (0.12.10)
execjs (1.4.0)
multi_json (~> 1.0)
factory_girl (4.0.0)
activesupport (>= 3.0.0)
factory_girl_rails (4.0.0)
factory_girl (~> 4.0.0)
railties (>= 3.0.0)
faraday (0.8.4)
multipart-post (~> 1.1)
faye-websocket (0.4.6)
@ -278,8 +291,10 @@ DEPENDENCIES
chunky_png (~> 1.2.6)
compass (~> 0.12.1)
compass-rails (~> 1.0.1)
debugger (= 1.2.0)
ejs (~> 1.1.1)
email_spec (= 1.2.1)
factory_girl_rails (~> 4.0.0)
foreman (= 0.46.0)
google-spreadsheet-ruby (= 0.1.8)
htmlentities (= 4.3.1)

@ -1 +1 @@
Subproject commit dd485bafd679fd078379c17be86480faa89baab2
Subproject commit a21dc2e074277fdea914af317ae5f2b4e3b2a552

@ -39,7 +39,7 @@ def get_spatial_reference(shapefile):
if not proj4:
#print 'Failed to convert prj of %s, giving up...' % shpfile
else:
#print 'Second try assuming ESRI wkt worked for %s!' % shpfile
#print 'Second try assuming ESRI wkt worked for %s!' % shpfile
"""
srs.from_esri = True
else:
@ -83,7 +83,7 @@ if os.path.isfile(prj_file):
if jres['codes']:
srid = int(jres['codes'][0]['code'])
except:
srid=None # ensure set back to 4326 whatever happens
srid=None # ensure set back to 4326 whatever happens
try:
# Try to detect the encoding
@ -101,17 +101,21 @@ try:
detector = UniversalDetector()
# 100 rows should be enough to figure encoding
# TODO: more broader and automated testing, allow
# TODO: more broader and automated testing, allow
# setting limit by command line param
for row in itertools.islice(db, 100):
# Feed detector with concatenated string fields
detector.feed( ''.join(row[fno] for fno in sfields) )
if detector.done: break
if detector.done: break
dbf.close()
detector.close()
encoding = detector.result["encoding"]
if encoding=="ascii":
encoding="LATIN1" # why not UTF8 here ?
# There's problems detecting LATIN1 encodings,
# it detects KOI8-R instead of LATIN1
if encoding=="KOI8-R":
encoding="LATIN1"
except Exception as err:
encoding="UTF8" # why not UTF8 here ?
#sys.stderr.write(repr(err)+'\n')

@ -390,6 +390,17 @@ describe CartoDB::Importer do
results[0].import_type.should == '.shp'
end
end
it "should import dbf with wrong encoding" do
importer = create_importer "Municipios.zip"
results, errors = importer.import!
@db.tables.should include(:cb_municipios_5000_e)
results[0].name.should == 'cb_municipios_5000_e'
results[0].rows_imported.should == 258
results[0].import_type.should == '.shp'
@db.select(:comarca).from(:cb_municipios_5000_e).all.first[:comarca].should be == 'MONTAÑA ALAVESA'
end
end
context "expected error results" do

Binary file not shown.
Loading…
Cancel
Save