Skip to content

Instantly share code, notes, and snippets.

@spmurrayzzz
Created April 18, 2013 18:24
Show Gist options
  • Save spmurrayzzz/5415046 to your computer and use it in GitHub Desktop.
Save spmurrayzzz/5415046 to your computer and use it in GitHub Desktop.
Scrape donors from HTML
#!/usr/bin/env python
from subprocess import call
from bs4 import BeautifulSoup
import json
import os
def get_html():
if os.path.isfile('richard-family-fund'):
os.remove('richard-family-fund')
call(
"wget https://www.wepay.com/donations/richard-family-fund",
shell=True
)
f = open('richard-family-fund', 'r')
html = f.read()
os.remove('richard-family-fund')
return html
def get_donors():
output = []
donors = soup.findAll('li', {"class": "donor"})
for donor in donors:
output.append(donor.get_text().strip())
return output
if __name__ == '__main__':
soup = BeautifulSoup(get_html())
output = get_donors()
f = open('donors.json', 'w+')
f.write(json.dumps(output))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment