mdvsh/mangla_learning.py

## mangla_learning.py
from bs4 import BeautifulSoup
src = open('bhai.html', 'r')
import re, csv, pandas
soup = BeautifulSoup(src, 'lxml')
data = {}
table = soup.find("table", attrs={'class':'stripe'})
table_head = table.thead.find_all("tr")
heading = []
for th in table_head[0].find_all("th"):
    heading.append(th.text.replace('\n', '').strip())
table_data = []
for tr in table.tbody.find_all("tr"):
    t_row = {}
    for td, th in zip(tr.find_all("td"), heading):
        t_row[th] = td.text.replace('\n', '').strip()
    table_data.append(t_row)
# print(table_data[1])
schools = []
for row in table_data:
    if 'School' in row['Finalist Name(s)']:
        s = re.sub(' +', ' ', row['Finalist Name(s)'])
        schools.append(s[s.find("(")+1:s.find(")")][8:])
# print(schools[7])
# print(schools)

df = pandas.DataFrame(schools)
print(df.head)

# with open('mangla_learning_schools.csv', 'w', newline="\n") as src:
#     writer = csv.writer(src, delimiter=',')
#     writer.writerow(schools)

df.to_csv('mangla_learning.csv')
	from bs4 import BeautifulSoup
	src = open('bhai.html', 'r')
	import re, csv, pandas
	soup = BeautifulSoup(src, 'lxml')
	data = {}
	table = soup.find("table", attrs={'class':'stripe'})
	table_head = table.thead.find_all("tr")
	heading = []
	for th in table_head[0].find_all("th"):
	heading.append(th.text.replace('\n', '').strip())
	table_data = []
	for tr in table.tbody.find_all("tr"):
	t_row = {}
	for td, th in zip(tr.find_all("td"), heading):
	t_row[th] = td.text.replace('\n', '').strip()
	table_data.append(t_row)
	# print(table_data[1])
	schools = []
	for row in table_data:
	if 'School' in row['Finalist Name(s)']:
	s = re.sub(' +', ' ', row['Finalist Name(s)'])
	schools.append(s[s.find("(")+1:s.find(")")][8:])
	# print(schools[7])
	# print(schools)

	df = pandas.DataFrame(schools)
	print(df.head)

	# with open('mangla_learning_schools.csv', 'w', newline="\n") as src:
	# writer = csv.writer(src, delimiter=',')
	# writer.writerow(schools)

	df.to_csv('mangla_learning.csv')