Skip to content

Instantly share code, notes, and snippets.

@co89757
Last active September 25, 2015 16:10
Show Gist options
  • Save co89757/a208e6af1f806118b9a9 to your computer and use it in GitHub Desktop.
Save co89757/a208e6af1f806118b9a9 to your computer and use it in GitHub Desktop.
scrape from EPSG lookup website various proj defn
```python
#!/usr/bin/python
import requests as rq
import bs4
import logging
logging.basicConfig(filename="epsgCrawl.log",level=logging.WARNING, format="%(levelname)s::%(message)s")
class EPSGLookup(object):
"""docstring for EPSGLookup"""
def __init__(self, codes):
super(EPSGLookup, self).__init__()
self.EPSGCodes = codes
self.baseURL = "http://spatialreference.org/ref/epsg/"
self.mapWKT = {}.fromkeys(codes)
self.mapProj4 = {}.fromkeys(codes)
self.mapESRI = {}.fromkeys(codes)
def LookupWKT(self, epsg):
if self.mapWKT.get(epsg):
return self.mapWKT[epsg]
else:
url = self.__catStr(self.baseURL, str(epsg), "/ogrwkt")
response = rq.get(url)
if response.status_code == 200:
ss = response.content
self.mapWKT[epsg] = ss
return ss
else:
return None
def LookupESRI(self,epsg):
if self.mapESRI.get(epsg):
return self.mapESRI[epsg]
else:
url = self.__catStr(self.baseURL, str(epsg), "/esriwkt")
response = rq.get(url)
if response.status_code == 200:
ss = response.content
self.mapESRI[epsg] = ss
return ss
else:
return None
def LookupProj4(self, epsg):
if self.mapProj4.get(epsg):
return self.mapProj4[epsg]
else:
url = self.__catStr(self.baseURL, str(epsg), "/proj4")
response = rq.get(url)
if response.status_code == 200:
ss = response.content
self.mapProj4[epsg] = ss
return ss
else:
return None
def CrawlAll(self):
self.CrawlESRI()
self.CrawlProj4()
self.CrawlWKT()
def CrawlProj4(self):
for epsg in self.EPSGCodes:
url = self.__catStr(self.baseURL, str(epsg), "/proj4/" )
logging.debug("URL: %s",url)
response = rq.get(url)
if response.status_code == 200:
prj4 = response.content
logging.debug("EPSG:%d Proj4: %s",epsg, prj4)
self.mapProj4[epsg] = prj4
def CrawlWKT(self):
for epsg in self.EPSGCodes:
url = self.__catStr(self.baseURL, str(epsg),"/ogcwkt/")
logging.debug("URL: %s" , url )
response = rq.get(url)
if response.status_code == 200:
wkt = response.content
logging.debug("EPSG: %d, WKT: %s",epsg, wkt)
self.mapWKT[epsg] = wkt
def CrawlESRI(self):
for epsg in self.EPSGCodes:
url = self.__catStr(self.baseURL, str(epsg),"/esriwkt/")
logging.debug("URL: %s" , url )
response = rq.get(url)
if response.status_code == 200:
wkt = response.content
logging.debug("EPSG: %d, ESRI_WKT: %s",epsg, wkt)
self.mapESRI[epsg] = wkt
def __catStr(self,*strings):
return ''.join(strings)
def mainTest():
codes = xrange(4500,4700 )
crawler = EPSGLookup(codes)
crawler.CrawlAll()
## file csv
csvname = "epsg_table.csv"
import csv
with open(csvname, 'wb') as csvfile:
writer = csv.writer(csvfile, doublequote = False, escapechar= "\\")
writer.writerow(["EPSG", "OGCWKT","ESRIWKT","PROJ4" ])
for code in codes:
if crawler.LookupProj4(code):
### skip non valid code
writer.writerow([ str(code), crawler.mapWKT[code], crawler.mapESRI[code],crawler.mapProj4[code] ])
print '======DONE======='
if __name__ == '__main__':
mainTest()
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment