Skip to content

Instantly share code, notes, and snippets.

@mkfink
Last active September 17, 2019 18:48
Show Gist options
  • Save mkfink/7f971012550087fa456a33dc78ead073 to your computer and use it in GitHub Desktop.
Save mkfink/7f971012550087fa456a33dc78ead073 to your computer and use it in GitHub Desktop.
scrape patronicity campaign for new donations and post them to slack and mqtt
# Scrapes a Patronicity fundraiser page for the list of donations
# Posts a notification in slack for new donations
# and publishes them to an mqtt-enabled display (https://www.i3detroit.org/wiki/16_segment_mqtt_display)
# requires therometer.py from https://gist.github.com/mkfink/9940ed027861b8252f6e485ee4fd8eb5
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from time import sleep
import re
from slack import WebClient
from subprocess import call
from datetime import datetime
from slack_api_key import SLACK_BOT_TOKEN
from thermometer import make_therm
total_log = "patronicity_total.log"
patronicity_url = "https://patronicity.com/project/i3detroit_expansion_1#!/"
chrome_driver = '/home/mkfink/.local/bin/chromedriver'
def get_donors():
''' Gets the list of donors patronicity page '''
op = Options()
op.add_argument("--no-sandbox")
op.add_argument("--disable-dev-shm-usage")
op.add_argument("--disable-gpu")
op.add_argument("--disable-extensions")
op.add_argument("disable-infobars")
op.add_argument("start-maximized")
op.add_argument('--headless')
driver = webdriver.Chrome(chrome_driver, options=op)
driver.get(patronicity_url)
# Open the list of donors
buttons = driver.find_elements_by_xpath("//*[contains(text(), 'Donors')]")
buttons[0].click()
sleep(4)
# Only 20 most recent donors get loaded. Find the button to load more and click it until it goes away.
load_button = driver.find_elements_by_xpath("//*[contains(text(), 'Load More...')]")
more_to_load = len(load_button) > 1
while more_to_load:
button = driver.find_elements_by_xpath("//*[contains(text(), 'Load More...')]")
if button[0].is_displayed():
button[0].click()
sleep(4)
else:
more_to_load = False
page_html = driver.page_source
driver.quit()
return(page_html)
def slack_output(contributions, total):
''' Post new contributions to slack '''
slack_client = WebClient(SLACK_BOT_TOKEN)
for c in contributions:
dollars = c[1]
donor = c[0]
message = f'New Patronicity donation of {dollars} from {donor}! Total raised is {total}'
slack_client.chat_postMessage(channel='#grants_fundraising', text=message)
def sign_output(contributions, total):
''' Publish the most recent contributions to the 16 segment LED sign '''
# If there were multiple contributions detected, only the second one ends up visible
for c in contributions:
dollars = re.sub(',', '', re.sub('\$', '', c[1]))
donor = c[0].upper()
total = re.sub(',', '', re.sub('\$', '', total)).upper()
led_message = f'NEW DONATION OF {dollars} FROM {donor} -- TOTAL RAISED IS {total} USD -- '
led_command = f'mosquitto_pub -t "cmnd/i3/inside/commons/16seg/display" -m "{led_message}"'
call(led_command, shell=True)
def handle_error(num_errors):
''' Logs errors when connecting to the patronicity site and quits after too many errors '''
count = str(num_errors)
with open('connection_errors.log', 'a+') as f:
now = datetime.now()
f.write(f'{now} Error connecting to patronicity.\n')
if num_errors >= 120:
slack_client = WebClient(SLACK_BOT_TOKEN)
message = f'Patronicity has been unavailable for {count} minutes.'
slack_client.chat_postMessage(channel='#grants_fundraising', text=message)
print(f'Too many connection errors ({count}), exiting.')
exit()
print(f'{now} Error connecting to patronicity. Trying again...')
sleep(60)
contributions_old = []
contributions_new = []
num_errors = 0
num_donors_old = 0
num_donors_new = 0
while True:
# Pull up the donations page and throw it into BeautifulSoup
page_html = None
page_soup = None
try:
page_html = get_donors()
page_soup = BeautifulSoup(page_html, 'lxml')
except:
num_errors += 1
handle_error(num_errors)
continue
# Failed requests return this for valid urls that don't exist and should return 404
# Probably something to do with selenium that I don't have time to learn now, so
# this is a hacky way of handling that without fixing the underlying issue
if '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>' in page_html:
num_errors += 1
print('returned blank page')
handle_error(num_errors)
continue
# We got this far, reset the error count
num_errors = 0
# Get total amount contributed
project_stats = page_soup.find("div", { "id" : "project-stats" }).text
try:
donation_total=re.search('(\$\d{1,3}(,\d{3})*)', project_stats).group(1)
except AttributeError:
print('Could not find donation total on page.')
continue
# The list of donors as html
donors_html = page_soup.find_all("div", {"class": "donor row"})
# Extract a list of donor names
donors_list_html = [x.find('a', {'class': 'ng-binding ng-scope'}) for x in donors_html]
donors = [x.text if x is not None else 'Anonymous' for x in donors_list_html]
# Extract a list of donated amonts
amt_html_list = [x.find('div', {'class': 'lead amount ng-binding ng-scope'}) for x in donors_html]
amts = [x.text.strip() if x is not None else 'Hidden' for x in amt_html_list]
# for cases when part or all of the donors list does not load
if len(donors) < num_donors_old and num_donors_old > 0:
print('Failed to load all donations. Trying again...')
continue
# These lists should be the same length. If not, something broke.
# Keep last copy of the list to compare against
if len(donors) == len(amts):
contributions_old = contributions_new
contributions_new = list(zip(donors, amts))
num_donors_old = num_donors_new
num_donors_new = len(donors)
else:
print('Different numbers of donors and amounts. Something\'s broken?')
exit()
now = datetime.now()
# If there are new donations and we didn't just start the program, post them
num_new = len(contributions_new) - len(contributions_old)
if (num_new > 0 and len(contributions_old) > 0) and (num_donors_new > num_donors_old):
print(f'{now} {num_new} new contributions found')
just_new = []
for x in range(len(contributions_new) - len(contributions_old)):
just_new.append(contributions_new[x])
print(just_new)
slack_output(just_new, donation_total)
sign_output(just_new, donation_total)
else:
print(f'{now} No new contributions. Total is {donation_total} from {len(donors)} donors.')
with open(total_log, 'a+') as f:
f.write(f'{now}\t{donation_total}\t{len(donors)}\n')
make_therm('thermometer.svg', int(re.sub(',', '', re.sub('\$', '', donation_total))), 50000)
# Don't spam the webserver
sleep(60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment