stevepowell99/extractDivs.py

## extractDivs.py
#!/usr/bin/env python


import poppler, os.path, os, time, datetime
from bs4 import BeautifulSoup

g = open("./output/all.html","w")
x=""
root="./output/"
all = BeautifulSoup(open("./output/index.html"))
p=all.find_all(typ='chapters')


for link in p:
    l = link.get('href')
    chap = BeautifulSoup(open("./output/"+l))
    chaptag=chap.find("article")
    tit=chap.find(id="page-title")
    book=chap.find_all("a",rel="bookmark")
    x=x+"<h1>"+str(tit)+"</h1>"+"<article  style='text-aline:left'>"+str(chaptag)+"</article>"
    if book is not None:
    	for mark in book:
           l2 = mark.get('href')
           art = BeautifulSoup(open("./output/"+l2[3:]))
           arttag=art.find("article")
           arttit=art.find(id="page-title")
           x=x+"<h2>"+str(arttit)+"</h2>"+"<article  style='text-aline:left'>"+str(arttag)+"</article>"


g.write(x)
g.close
	#!/usr/bin/env python




	import poppler, os.path, os, time, datetime
	from bs4 import BeautifulSoup

	g = open("./output/all.html","w")
	x=""
	root="./output/"
	all = BeautifulSoup(open("./output/index.html"))
	p=all.find_all(typ='chapters')



	for link in p:
	l = link.get('href')
	chap = BeautifulSoup(open("./output/"+l))
	chaptag=chap.find("article")
	tit=chap.find(id="page-title")
	book=chap.find_all("a",rel="bookmark")
	x=x+"<h1>"+str(tit)+"</h1>"+"<article style='text-aline:left'>"+str(chaptag)+"</article>"
	if book is not None:
	for mark in book:
	l2 = mark.get('href')
	art = BeautifulSoup(open("./output/"+l2[3:]))
	arttag=art.find("article")
	arttit=art.find(id="page-title")
	x=x+"<h2>"+str(arttit)+"</h2>"+"<article style='text-aline:left'>"+str(arttag)+"</article>"


	g.write(x)
	g.close