Skip to content

Instantly share code, notes, and snippets.

@step21
Last active October 12, 2017 02:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save step21/cc7fe9829fa89c4077dd1075d430945c to your computer and use it in GitHub Desktop.
Save step21/cc7fe9829fa89c4077dd1075d430945c to your computer and use it in GitHub Desktop.
# encoding=utf-8
import epidat_parse_bfs
from loc import loc
from poerelief import db, models
import untangle
#import db_access
#import pymongo as PyMongo
baseurl = "http://steinheim-institut.de/cgi-bin/epidat?id="
#baseurl for list of records
rbaseurl = "http://www.steinheim-institut.de/cgi-bin/epidat?sel="
selrecords = "&format=x&function=changelog&changesSince=20061201"
# The seperator
s = "-"
# specifies the format
format = "teip5"
class Harvest(object):
def __init__(self):
self.loclist = []
self.recurls = []
def initlocurls(self):
for l in loc:
self.loclist.append(rbaseurl + l + selrecords)
return self.loclist
def initrecurls(self, loclist):
for url in loclist:
doc = untangle.parse(url)
if int(doc.xml.changes['size']) > 0:
for i in doc.xml.changes.id:
self.recurls.append(baseurl + i.cdata + s + format)
return self.recurls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment