Skip to content

Instantly share code, notes, and snippets.

@jatorre
Created April 24, 2011 19:53
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jatorre/939830 to your computer and use it in GitHub Desktop.
Save jatorre/939830 to your computer and use it in GitHub Desktop.
A python script to detect encoding and SRID of a shapefile GUESSING its best
from chardet.universaldetector import UniversalDetector
import os.path
import sys
import dbfUtils
import sys
from osgeo import osr
from urllib import urlencode
from urllib2 import urlopen
import json
shp_file = sys.argv[1]
dbf_file = shp_file[0:-4] + '.dbf'
prj_file = shp_file[0:-4] + '.prj'
#Try detecting the SRID, by default we set to 4326 and hope the best
srid=4326
if os.path.isfile(prj_file):
prj_filef = open(prj_file, 'r')
prj_txt = prj_filef.read()
prj_filef.close()
srs = osr.SpatialReference()
srs.ImportFromESRI([prj_txt])
srs.AutoIdentifyEPSG()
code = srs.GetAuthorityCode(None)
if code:
srid= code
else:
#Ok, no luck, lets try with the OpenGeo service
query = urlencode({
'exact' : True,
'error' : True,
'mode' : 'wkt',
'terms' : prj_txt})
webres = urlopen('http://prj2epsg.org/search.json', query)
jres = json.loads(webres.read())
if jres['codes']:
srid = int(jres['codes'][0]['code'])
#Try to detect the encoding
dbf = open(dbf_file, 'rb')
db = dbfUtils.dbfreader(dbf)
detector = UniversalDetector()
for row in db:
detector.feed(str(row))
if detector.done: break
detector.close()
dbf.close()
encoding = detector.result["encoding"]
if encoding=="ascii":
encoding="LATIN1"
print "shp2pgsql -s %s -k -i -I -W %s %s.shp public.importing_table" %(srid,encoding,shp_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment