Skip to content

Instantly share code, notes, and snippets.

@DastanIqbal
Created March 9, 2018 11:02
Show Gist options
  • Save DastanIqbal/0fd199762f0a247b0dcf0facb5491658 to your computer and use it in GitHub Desktop.
Save DastanIqbal/0fd199762f0a247b0dcf0facb5491658 to your computer and use it in GitHub Desktop.
#This gist I created to extract data from downloaded html in seperate folder,
#fetch one string from html file & and use that string to rename file
import shutil
import glob, os
from bs4 import BeautifulSoup
wdir=os.getcwd()+"/result/Year8k9k"
for fname in ["150-N","8752-8784","8790-8891","8893-8930","8933-8979","8983-9150"]:
os.chdir(wdir+"/"+fname)
for file in glob.glob("*.html"):
with open(file, 'r') as content_file:
content = content_file.read()
soup = BeautifulSoup(content, 'html.parser')
table=soup.find_all('table')
rows = table[4].find_all('tr')
rollNo=rows[0].findAll('td')[1].find('font').find(text=True)
shutil.move(file,'log_'+rollNo.strip()+'.html')
print(fname)
os.chdir(wdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment