Skip to content

Instantly share code, notes, and snippets.

@wrenoud
Created March 25, 2016 21:56
Show Gist options
  • Save wrenoud/91d341147114d951677e to your computer and use it in GitHub Desktop.
Save wrenoud/91d341147114d951677e to your computer and use it in GitHub Desktop.
Hsda.py
# coding: utf-8
import os
import requests
import re
import json
# hdsa
if not os.path.exists('hdsa.html'):
req =requests.request('get','http://hdsa.org/about-hdsa/locate-resources/')
f = open('hdsa.html','w')
f.write(req.content)
f.close()
f = open('hdsa.html','r')
contents = f.read()
f.close()
match = re.search('\<script\> var allmarkers = (.*?)\<\/script>', contents)
data = json.loads(match.group(1))
fields = ['hdsatitle', 'hdsaname', 'address', 'suitefloor', 'citystate', 'zip', 'cliniccoordinator', 'affiliation', 'hdsatype', 'hdsaphone', 'hdsaemail', 'website', 'hdsaid']
with open('hdsa.csv','w') as f:
f.write("'"+"','".join(list(s for s in fields))+"'\n")
for record in data:
opts = record['options']
f.write("'"+"','".join((opts['hdsatitle'], opts['hdsaname'], opts['address'], opts['suitefloor'], opts['citystate'], opts['zip'], opts['cliniccoordinator'], opts['affiliation'], opts['hdsatype'], opts['hdsaphone'], opts['hdsaemail'], opts['website'], str(opts['hdsaid'])))+"'\n")
# hsa
if not os.path.exists('hsa.html'):
req = requests.request('get','http://hda.org.uk/international.js')
with open('hsa.html','w') as f:
f.write(req.content)
with open('hsa.html','r') as f:
contents = f.read()
match = re.search('IHA_Listing_Items = ([\S\s]*?);\n', contents)
data = json.loads(match.group(1).replace("'",'"').replace(',\n }','\n }').replace('\&','&'))
with open('hsa.csv','w') as f:
f.write("'region','info','email','phone','website'\n")
for record in data:
region = record['Region']
for contact in record['DetailsPlain'].split('\n\n'):
emails = []
numbers = []
details = []
websites = []
contact = contact.strip()
for line in contact.split('\n'):
if '@' in line:
emails += [s.strip() for s in re.split(':|;|/',line)]
elif 'Tel' in line or 'tel' in line or 'Fax' in line or 'Mobil' in line or 'Hotline' in line or 'Cellphone' in line:
numbers += [s.strip() for s in re.split(':|;',line)]
elif 'Website' in line or 'www' in line or 'Facebook' in line or 'Twitter' in line:
websites += [s.strip() for s in re.split(':|;',line)]
else:
details += [line]
f.write("'"+"','".join((region, ';'.join(details), ';'.join(emails), ';'.join(numbers), ';'.join(websites)))+"'\n")
print('done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment