Last active
September 25, 2015 16:10
-
-
Save co89757/a208e6af1f806118b9a9 to your computer and use it in GitHub Desktop.
scrape from EPSG lookup website various proj defn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```python | |
#!/usr/bin/python | |
import requests as rq | |
import bs4 | |
import logging | |
logging.basicConfig(filename="epsgCrawl.log",level=logging.WARNING, format="%(levelname)s::%(message)s") | |
class EPSGLookup(object): | |
"""docstring for EPSGLookup""" | |
def __init__(self, codes): | |
super(EPSGLookup, self).__init__() | |
self.EPSGCodes = codes | |
self.baseURL = "http://spatialreference.org/ref/epsg/" | |
self.mapWKT = {}.fromkeys(codes) | |
self.mapProj4 = {}.fromkeys(codes) | |
self.mapESRI = {}.fromkeys(codes) | |
def LookupWKT(self, epsg): | |
if self.mapWKT.get(epsg): | |
return self.mapWKT[epsg] | |
else: | |
url = self.__catStr(self.baseURL, str(epsg), "/ogrwkt") | |
response = rq.get(url) | |
if response.status_code == 200: | |
ss = response.content | |
self.mapWKT[epsg] = ss | |
return ss | |
else: | |
return None | |
def LookupESRI(self,epsg): | |
if self.mapESRI.get(epsg): | |
return self.mapESRI[epsg] | |
else: | |
url = self.__catStr(self.baseURL, str(epsg), "/esriwkt") | |
response = rq.get(url) | |
if response.status_code == 200: | |
ss = response.content | |
self.mapESRI[epsg] = ss | |
return ss | |
else: | |
return None | |
def LookupProj4(self, epsg): | |
if self.mapProj4.get(epsg): | |
return self.mapProj4[epsg] | |
else: | |
url = self.__catStr(self.baseURL, str(epsg), "/proj4") | |
response = rq.get(url) | |
if response.status_code == 200: | |
ss = response.content | |
self.mapProj4[epsg] = ss | |
return ss | |
else: | |
return None | |
def CrawlAll(self): | |
self.CrawlESRI() | |
self.CrawlProj4() | |
self.CrawlWKT() | |
def CrawlProj4(self): | |
for epsg in self.EPSGCodes: | |
url = self.__catStr(self.baseURL, str(epsg), "/proj4/" ) | |
logging.debug("URL: %s",url) | |
response = rq.get(url) | |
if response.status_code == 200: | |
prj4 = response.content | |
logging.debug("EPSG:%d Proj4: %s",epsg, prj4) | |
self.mapProj4[epsg] = prj4 | |
def CrawlWKT(self): | |
for epsg in self.EPSGCodes: | |
url = self.__catStr(self.baseURL, str(epsg),"/ogcwkt/") | |
logging.debug("URL: %s" , url ) | |
response = rq.get(url) | |
if response.status_code == 200: | |
wkt = response.content | |
logging.debug("EPSG: %d, WKT: %s",epsg, wkt) | |
self.mapWKT[epsg] = wkt | |
def CrawlESRI(self): | |
for epsg in self.EPSGCodes: | |
url = self.__catStr(self.baseURL, str(epsg),"/esriwkt/") | |
logging.debug("URL: %s" , url ) | |
response = rq.get(url) | |
if response.status_code == 200: | |
wkt = response.content | |
logging.debug("EPSG: %d, ESRI_WKT: %s",epsg, wkt) | |
self.mapESRI[epsg] = wkt | |
def __catStr(self,*strings): | |
return ''.join(strings) | |
def mainTest(): | |
codes = xrange(4500,4700 ) | |
crawler = EPSGLookup(codes) | |
crawler.CrawlAll() | |
## file csv | |
csvname = "epsg_table.csv" | |
import csv | |
with open(csvname, 'wb') as csvfile: | |
writer = csv.writer(csvfile, doublequote = False, escapechar= "\\") | |
writer.writerow(["EPSG", "OGCWKT","ESRIWKT","PROJ4" ]) | |
for code in codes: | |
if crawler.LookupProj4(code): | |
### skip non valid code | |
writer.writerow([ str(code), crawler.mapWKT[code], crawler.mapESRI[code],crawler.mapProj4[code] ]) | |
print '======DONE=======' | |
if __name__ == '__main__': | |
mainTest() | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment