Skip to content

Instantly share code, notes, and snippets.

@rbonick
Created May 6, 2015 23:11
Show Gist options
  • Save rbonick/3d5e3fc4493c499ecfa8 to your computer and use it in GitHub Desktop.
Save rbonick/3d5e3fc4493c499ecfa8 to your computer and use it in GitHub Desktop.
BTW 250 Crawling script
from bs4 import BeautifulSoup
import requests
import time
NO_INFO = "We're sorry, there is currently no information available. Please check back later."
last_updated = ""
while 1:
r = requests.get('https://campusrec.illinois.edu/counts/')
soup = BeautifulSoup(r.text)
table = soup.find(id="rove_table")
if NO_INFO in table.descendants:
last_update = table.find('p').string.split()[2] + table.find('p').string.split()[3]
print("There is no data for this runtime " + last_update)
else:
with open('btw250.dat', 'a') as f:
write_buffer = []
for tr in table.children:
try:
arr = []
for td in tr.children:
if td.string is not None:
string = td.string.replace(u'\xa0', u'')
arr.append(string)
if(len(arr)) is 3:
write_buffer.append(arr[0] + " - " + arr[1] + " - " + arr[2])
elif len(arr) is 1:
checked = arr[0].split()[4] + arr[0].split()[5]
checked = checked.replace("(", "").replace(",", "")
if checked not in last_updated:
last_updated = checked
f.write("Updated at " + checked + '\n')
print("Updated at " + checked)
for line in write_buffer:
f.write(line + '\n')
f.write('\n')
else:
print "No update for time " + checked
except AttributeError:
pass
time.sleep(1800)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment