Skip to content

Instantly share code, notes, and snippets.

@kagesenshi
Last active October 5, 2015 08:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kagesenshi/244113631a7640c596d1 to your computer and use it in GitHub Desktop.
Save kagesenshi/244113631a7640c596d1 to your computer and use it in GitHub Desktop.
import urllib
import json
import re
from dateutil.parser import parse as parse_date
from datetime import datetime
f = urllib.urlopen("http://apims.doe.gov.my/v2/").read()
stage1 = []
for l in f.split("\n"):
if "latLng" in l:
l = l.strip()
if not l.startswith("{"):
continue
if l.endswith(','):
l = l[:-1]
if l.endswith(',}'):
l = l[:-2] + '}'
l = l.replace("'", '"')
l = (l.replace("latLng:", '"latLng":')
.replace("data:", '"data":')
.replace('options:', '"options":')
.replace('icon:', '"icon":'))
stage1.append(l)
stage2 = []
pattern = r'<table><tr><td style=".*?"><h2>(.*?)</h2>(.*?)</td><td style=".*?"><h2>(.*?)</h2>(.*?)</td></tr></table>'
pattern = re.compile(pattern)
for i in stage1:
l = json.loads(i)
match = pattern.match(l['data'])
api, level, location, timestamp = match.groups()
d = {
'latLng': l['latLng'],
'api': int(api.replace("*",'')),
'level': level,
'location': location,
'timestamp': parse_date(timestamp).isoformat()
}
stage2.append(json.dumps(d))
filename = datetime.now().strftime("%Y%m%dT%H%M.jsonl")
with open(filename, "w") as o:
for l in stage2:
o.write(l + '\n')
print "Written: %s" % filename
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment