gugray/html2txt.py Secret

## html2txt.py
import os
from bs4 import BeautifulSoup

def html2txt(fin, fout):
  with open(fin, "r", encoding="utf8") as fi:
    html = fi.read()
    soup = BeautifulSoup(html, "html.parser")
    # Rip out scripts and style
    for script in soup(["script", "style"]): script.extract()
    text = soup.get_text()
    with open(fout, 'w', encoding="utf8") as fo:
      fo.write(text)

for filename in os.listdir("./html"):
  if not filename.endswith(".html"): continue
  html2txt("./html/" + filename, "./txt/" + filename + ".txt")
	import os
	from bs4 import BeautifulSoup

	def html2txt(fin, fout):
	with open(fin, "r", encoding="utf8") as fi:
	html = fi.read()
	soup = BeautifulSoup(html, "html.parser")
	# Rip out scripts and style
	for script in soup(["script", "style"]): script.extract()
	text = soup.get_text()
	with open(fout, 'w', encoding="utf8") as fo:
	fo.write(text)

	for filename in os.listdir("./html"):
	if not filename.endswith(".html"): continue
	html2txt("./html/" + filename, "./txt/" + filename + ".txt")