Skip to content

Instantly share code, notes, and snippets.

@hoefler02
Created March 19, 2020 04:45
Show Gist options
  • Save hoefler02/912ab501072b8e27ab2c231dea350b11 to your computer and use it in GitHub Desktop.
Save hoefler02/912ab501072b8e27ab2c231dea350b11 to your computer and use it in GitHub Desktop.
# Scrapes MCS staff data from public directory
# Used for MHS-Protect app project
import requests, time, json
from bs4 import BeautifulSoup
people = []
file = open('people.json', 'w')
for teacherID in range(523,671):
if (teacherID == 568): continue # Discludes Invalid Staff Page @ https://mohigans.mono.k12.wv.us/2/staff/568
pageSRC = requests.get('https://mohigans.mono.k12.wv.us/2/staff/' + str(teacherID)).text
data = BeautifulSoup(pageSRC, 'html.parser').find_all('div', class_='field-content')
person = {}
person['name'] = data[0].contents[0]
person['role'] = data[1].contents[0]
person['email'] = BeautifulSoup(str(data[2].contents[0]), 'html.parser').find_all('a')[0].contents[0]
person['phone'] = BeautifulSoup(str(data[3].contents[0]), 'html.parser').find_all('a')[0].contents[0]
#print(teacherID, person) # shows each person as they are added
people.append(person)
time.sleep(1)
json_data = json.dumps(people, indent = 3)
file.write(json_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment