Skip to content

Instantly share code, notes, and snippets.

@jalbertbowden
Last active December 16, 2020 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jalbertbowden/ecacdd6e867c9f6d81a118b69edda25d to your computer and use it in GitHub Desktop.
Save jalbertbowden/ecacdd6e867c9f6d81a118b69edda25d to your computer and use it in GitHub Desktop.
scrape longwood covid data
Active Cases Total Cumulative Cases Total Students Total Faculty Date
import requests
from bs4 import BeautifulSoup
import json
import re
import csv
def get_date_updated(soup, date_months):
date_updated = soup.findAll('p', {'class': 'lastUpdated'})
date_up = date_updated[0].text
date_arr = date_up.split(':')
date_value = date_arr[1].strip()
dav = date_value.split(' ')
dav = dav[2] + ' ' + dav[0] + ' ' + dav[1]
data_date = format_date(dav, date_months)
date_correct_format = data_date[0].replace(',', '')
return date_correct_format
def format_date(var, date_months):
var_arr = var.split(' ')
if var_arr[1] == date_months[0]:
date_month = '01'
elif var_arr[1] == date_months[1]:
date_month = '02'
elif var_arr[1] == date_months[2]:
date_month = '03'
elif var_arr[1] == date_months[3]:
date_month = '04'
elif var_arr[1] == date_months[4]:
date_month = '05'
elif var_arr[1] == date_months[5]:
date_month = '06'
elif var_arr[1] == date_months[6]:
date_month = '07'
elif var_arr[1] == date_months[7]:
date_month = '08'
elif var_arr[1] == date_months[8]:
date_month = '09'
elif var_arr[1] == date_months[9]:
date_month = '10'
elif var_arr[1] == date_months[10]:
date_month = '11'
elif var_arr[1] == date_months[11]:
date_month = '12'
date_form = '2020-' + date_month + '-' + var_arr[2]
date_form_arr = [date_form, var_arr[0]]
return date_form_arr
def csv_append_row(csv_name, csv_row, csv_headers):
with open(csv_name, 'a') as fd:
writer = csv.DictWriter(fd, fieldnames = csv_headers)
writer.writerow(csv_row)
# csv_append_row(csv_name, my_list, csv_headers)
def scrape_longwood():
csv_headers = ["Active Cases", "Total Cumulative Cases", "Total Students", "Total Faculty", "Date"]
date_months = ['Jan.', 'Feb.', 'Mar.', 'Apr.', 'May', 'Jun.', 'Jul.', 'Aug.', 'Sep.', 'Oct.', 'Nov.','Dec.']
url = 'http://www.longwood.edu/covid19/dashboard/'
csv_name = 'longwood-covid-19.csv'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
tables = soup.findAll('table')[1]
td_a = 0
td_b = 1
td_c = 2
td_d = 3
table_totals = soup.findAll('table')[0]
table_totals_tds = table_totals.find_all('strong')
total_students = table_totals_tds[0].text
total_faculty = table_totals_tds[1].text
total_active = soup.findAll('div', {'class': 'value'})
total_active_cases = total_active[0].text
total_cumulative_confirmed_cases = total_active[1].text
td_cases = tables.find_all('td', {'class': 'caseValue'})
td_dates = tables.find_all('td', {'class': 'reportDate'})
date_intl = get_date_updated(soup, date_months)
new_dict = {}
new_dict['Active Cases'] = total_active_cases
new_dict['Total Cumulative Cases'] = total_cumulative_confirmed_cases
new_dict['Total Students'] = total_students
new_dict['Total Faculty'] = total_faculty
new_dict['Date'] = date_intl
# print(new_dict)
csv_append_row(csv_name, new_dict, csv_headers)
# csv_row_new_str = total_active_cases + ', ' + total_cumulative_confirmed_cases + ', ' + total_students + ', ' + total_faculty + ', ' + datxxx
"""these_rows = []
for td in td_dates:
this_date = format_date(td.text, date_months)
this_row = [this_date[1], this_date[0], td_cases[td_a].text, td_cases[td_b].text, td_cases[td_c].text, td_cases[td_d].text]
these_rows.append(this_row)
td_a = td_a + 4
td_b = td_b + 4
td_c = td_c + 4
td_d = td_d + 4"""
scrape_longwood()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment