Skip to content

Instantly share code, notes, and snippets.

@mdvsh
Created October 16, 2020 16:43
Show Gist options
  • Save mdvsh/11a4a5b14977d25f443d37ea005b6897 to your computer and use it in GitHub Desktop.
Save mdvsh/11a4a5b14977d25f443d37ea005b6897 to your computer and use it in GitHub Desktop.
isef scraper
from bs4 import BeautifulSoup
src = open('bhai.html', 'r')
import re, csv, pandas
soup = BeautifulSoup(src, 'lxml')
data = {}
table = soup.find("table", attrs={'class':'stripe'})
table_head = table.thead.find_all("tr")
heading = []
for th in table_head[0].find_all("th"):
heading.append(th.text.replace('\n', '').strip())
table_data = []
for tr in table.tbody.find_all("tr"):
t_row = {}
for td, th in zip(tr.find_all("td"), heading):
t_row[th] = td.text.replace('\n', '').strip()
table_data.append(t_row)
# print(table_data[1])
schools = []
for row in table_data:
if 'School' in row['Finalist Name(s)']:
s = re.sub(' +', ' ', row['Finalist Name(s)'])
schools.append(s[s.find("(")+1:s.find(")")][8:])
# print(schools[7])
# print(schools)
df = pandas.DataFrame(schools)
print(df.head)
# with open('mangla_learning_schools.csv', 'w', newline="\n") as src:
# writer = csv.writer(src, delimiter=',')
# writer.writerow(schools)
df.to_csv('mangla_learning.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment