Last active
September 17, 2019 18:48
-
-
Save mkfink/7f971012550087fa456a33dc78ead073 to your computer and use it in GitHub Desktop.
scrape patronicity campaign for new donations and post them to slack and mqtt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scrapes a Patronicity fundraiser page for the list of donations | |
# Posts a notification in slack for new donations | |
# and publishes them to an mqtt-enabled display (https://www.i3detroit.org/wiki/16_segment_mqtt_display) | |
# requires therometer.py from https://gist.github.com/mkfink/9940ed027861b8252f6e485ee4fd8eb5 | |
from bs4 import BeautifulSoup | |
from selenium.webdriver.chrome.options import Options | |
from selenium import webdriver | |
from time import sleep | |
import re | |
from slack import WebClient | |
from subprocess import call | |
from datetime import datetime | |
from slack_api_key import SLACK_BOT_TOKEN | |
from thermometer import make_therm | |
total_log = "patronicity_total.log" | |
patronicity_url = "https://patronicity.com/project/i3detroit_expansion_1#!/" | |
chrome_driver = '/home/mkfink/.local/bin/chromedriver' | |
def get_donors(): | |
''' Gets the list of donors patronicity page ''' | |
op = Options() | |
op.add_argument("--no-sandbox") | |
op.add_argument("--disable-dev-shm-usage") | |
op.add_argument("--disable-gpu") | |
op.add_argument("--disable-extensions") | |
op.add_argument("disable-infobars") | |
op.add_argument("start-maximized") | |
op.add_argument('--headless') | |
driver = webdriver.Chrome(chrome_driver, options=op) | |
driver.get(patronicity_url) | |
# Open the list of donors | |
buttons = driver.find_elements_by_xpath("//*[contains(text(), 'Donors')]") | |
buttons[0].click() | |
sleep(4) | |
# Only 20 most recent donors get loaded. Find the button to load more and click it until it goes away. | |
load_button = driver.find_elements_by_xpath("//*[contains(text(), 'Load More...')]") | |
more_to_load = len(load_button) > 1 | |
while more_to_load: | |
button = driver.find_elements_by_xpath("//*[contains(text(), 'Load More...')]") | |
if button[0].is_displayed(): | |
button[0].click() | |
sleep(4) | |
else: | |
more_to_load = False | |
page_html = driver.page_source | |
driver.quit() | |
return(page_html) | |
def slack_output(contributions, total): | |
''' Post new contributions to slack ''' | |
slack_client = WebClient(SLACK_BOT_TOKEN) | |
for c in contributions: | |
dollars = c[1] | |
donor = c[0] | |
message = f'New Patronicity donation of {dollars} from {donor}! Total raised is {total}' | |
slack_client.chat_postMessage(channel='#grants_fundraising', text=message) | |
def sign_output(contributions, total): | |
''' Publish the most recent contributions to the 16 segment LED sign ''' | |
# If there were multiple contributions detected, only the second one ends up visible | |
for c in contributions: | |
dollars = re.sub(',', '', re.sub('\$', '', c[1])) | |
donor = c[0].upper() | |
total = re.sub(',', '', re.sub('\$', '', total)).upper() | |
led_message = f'NEW DONATION OF {dollars} FROM {donor} -- TOTAL RAISED IS {total} USD -- ' | |
led_command = f'mosquitto_pub -t "cmnd/i3/inside/commons/16seg/display" -m "{led_message}"' | |
call(led_command, shell=True) | |
def handle_error(num_errors): | |
''' Logs errors when connecting to the patronicity site and quits after too many errors ''' | |
count = str(num_errors) | |
with open('connection_errors.log', 'a+') as f: | |
now = datetime.now() | |
f.write(f'{now} Error connecting to patronicity.\n') | |
if num_errors >= 120: | |
slack_client = WebClient(SLACK_BOT_TOKEN) | |
message = f'Patronicity has been unavailable for {count} minutes.' | |
slack_client.chat_postMessage(channel='#grants_fundraising', text=message) | |
print(f'Too many connection errors ({count}), exiting.') | |
exit() | |
print(f'{now} Error connecting to patronicity. Trying again...') | |
sleep(60) | |
contributions_old = [] | |
contributions_new = [] | |
num_errors = 0 | |
num_donors_old = 0 | |
num_donors_new = 0 | |
while True: | |
# Pull up the donations page and throw it into BeautifulSoup | |
page_html = None | |
page_soup = None | |
try: | |
page_html = get_donors() | |
page_soup = BeautifulSoup(page_html, 'lxml') | |
except: | |
num_errors += 1 | |
handle_error(num_errors) | |
continue | |
# Failed requests return this for valid urls that don't exist and should return 404 | |
# Probably something to do with selenium that I don't have time to learn now, so | |
# this is a hacky way of handling that without fixing the underlying issue | |
if '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>' in page_html: | |
num_errors += 1 | |
print('returned blank page') | |
handle_error(num_errors) | |
continue | |
# We got this far, reset the error count | |
num_errors = 0 | |
# Get total amount contributed | |
project_stats = page_soup.find("div", { "id" : "project-stats" }).text | |
try: | |
donation_total=re.search('(\$\d{1,3}(,\d{3})*)', project_stats).group(1) | |
except AttributeError: | |
print('Could not find donation total on page.') | |
continue | |
# The list of donors as html | |
donors_html = page_soup.find_all("div", {"class": "donor row"}) | |
# Extract a list of donor names | |
donors_list_html = [x.find('a', {'class': 'ng-binding ng-scope'}) for x in donors_html] | |
donors = [x.text if x is not None else 'Anonymous' for x in donors_list_html] | |
# Extract a list of donated amonts | |
amt_html_list = [x.find('div', {'class': 'lead amount ng-binding ng-scope'}) for x in donors_html] | |
amts = [x.text.strip() if x is not None else 'Hidden' for x in amt_html_list] | |
# for cases when part or all of the donors list does not load | |
if len(donors) < num_donors_old and num_donors_old > 0: | |
print('Failed to load all donations. Trying again...') | |
continue | |
# These lists should be the same length. If not, something broke. | |
# Keep last copy of the list to compare against | |
if len(donors) == len(amts): | |
contributions_old = contributions_new | |
contributions_new = list(zip(donors, amts)) | |
num_donors_old = num_donors_new | |
num_donors_new = len(donors) | |
else: | |
print('Different numbers of donors and amounts. Something\'s broken?') | |
exit() | |
now = datetime.now() | |
# If there are new donations and we didn't just start the program, post them | |
num_new = len(contributions_new) - len(contributions_old) | |
if (num_new > 0 and len(contributions_old) > 0) and (num_donors_new > num_donors_old): | |
print(f'{now} {num_new} new contributions found') | |
just_new = [] | |
for x in range(len(contributions_new) - len(contributions_old)): | |
just_new.append(contributions_new[x]) | |
print(just_new) | |
slack_output(just_new, donation_total) | |
sign_output(just_new, donation_total) | |
else: | |
print(f'{now} No new contributions. Total is {donation_total} from {len(donors)} donors.') | |
with open(total_log, 'a+') as f: | |
f.write(f'{now}\t{donation_total}\t{len(donors)}\n') | |
make_therm('thermometer.svg', int(re.sub(',', '', re.sub('\$', '', donation_total))), 50000) | |
# Don't spam the webserver | |
sleep(60) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment