Skip to content

Instantly share code, notes, and snippets.

@edsu edsu/blm.py
Created Apr 1, 2019

Embed
What would you like to do?
#!/usr/bin/env python3
import re
import csv
import json
import time
from requests_html import HTMLSession
def main():
output = csv.DictWriter(open('blm.csv', 'w'), fieldnames=['place', 'start',
'end', 'subject', 'participant', 'time', 'description', 'source'])
output.writeheader()
for item in get_items():
output.writerow(item)
def get_items():
http = HTMLSession()
url = 'https://elephrame.com/gather'
payload = {"contentType": "blm", "search": ""}
headers = {"Referer": "https://elephrame.com/textbook/BLM/chart"}
for page in range(1, 111):
payload['page'] = page
time.sleep(2)
resp = http.post(url, headers=headers, data=payload)
html = resp.html
for e in html.find('.item'):
yield get_item(e)
def get_item(e):
item = {
"place": scrape(e, '.item-protest-location'),
"start": scrape(e, '.protest-start'),
"end": scrape(e, '.protest-end'),
"subject": scrape(e, '.item-protest-subject', 'Subject(s)'),
"participant": scrape(e, '.item-protest-participants', 'Participant(s)'),
"time": scrape(e, '.item-protest-time', 'Time'),
"description": scrape(e, '.item-protest-description', 'Description'),
"source": scrape(e, '.item-protest-url', 'Source(s)')
}
return item
def scrape(html, q, prefix=''):
s = html.find(q, first=True).text
s = re.sub('\n', ' ', s)
s = re.sub(' +', ' ', s)
s = s.replace(prefix + ': ', '')
return s
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.