from chardet.universaldetector import UniversalDetector import itertools import os.path import sys import dbfUtils import sys from osgeo import osr from urllib import urlencode from urllib2 import urlopen import json import subprocess if len(sys.argv) != 3: print "usage: python %s shp_file name" % sys.argv[0] sys.exit() shp_file = sys.argv[1] name = sys.argv[2] dbf_file = shp_file[0:-4] + '.dbf' prj_file = shp_file[0:-4] + '.prj' shape_name = os.path.splitext(shp_file)[0] def get_spatial_reference(shapefile): srs = osr.SpatialReference() shpfile = os.path.basename(shape_name) ret = srs.SetFromUserInput(prj_string) proj4 = srs.ExportToProj4() if not proj4: prj_file = open(shape_name +'.prj','r') prj_lines = prj_file.readlines() prj_file.close() for i in range(len(prj_lines)): prj_lines[i] = prj_string.rstrip( prj_lines[i] ) srs = osr.SpatialReference() srs.ImportFromESRI(prj_lines) proj4 = srs.ExportToProj4() """ if not proj4: #print 'Failed to convert prj of %s, giving up...' % shpfile else: #print 'Second try assuming ESRI wkt worked for %s!' % shpfile """ srs.from_esri = True else: srs.from_esri = False srs.AutoIdentifyEPSG() return srs def to_epsg(srs): if srs.IsGeographic(): return srs.GetAuthorityCode('GEOGCS') else: c = srs.GetAuthorityCode('PROJCS') if c: return c else: try: return srs.GetAuthorityCode('GEOGCS') except: return None srid = None #Try detecting the SRID if os.path.isfile(prj_file): prj_string = open(prj_file,'r').read() code = to_epsg(get_spatial_reference(shp_file)) srid = code if code else None try: # Try to detect the encoding dbf = open(dbf_file.strip(), 'rb') db = dbfUtils.dbfreader(dbf) fnames = db.next() ftypes = db.next() # find string fields sfields = [] for fno in range(len(fnames)): if ( ftypes[fno][0] == 'C' ) : sfields.append(fno) detector = UniversalDetector() # TODO: Make this a % of total table size and stop guessing correct values for guessing for row in itertools.islice(db, 1000): # Feed detector with concatenated string fields detector.feed( ''.join(row[fno] for fno in sfields) ) if detector.done: break dbf.close() detector.close() encoding = detector.result["encoding"] confidence = detector.result["confidence"] if encoding=="ascii": encoding="LATIN1" # why not UTF8 here ? # There's problems detecting LATIN1 encodings, it detects KOI8-R instead of LATIN1 if encoding=="KOI8-R": encoding="LATIN1" # Fix for #1336: since ISO-8859-2 is unlikely and UniversalDetector doesn't support ISO-8859-1, # we'll fallback to ISO-8859-1 if confidence is not high if encoding=="ISO-8859-2" and confidence < 0.75: encoding="ISO-8859-1" except Exception as err: encoding="None" # why not UTF8 here ? #sys.stderr.write(repr(err)+'\n') #sys.exit(1) print "%s,%s,%s,%s" % (srid,encoding,shp_file,name)