Skip to content

Instantly share code, notes, and snippets.

@aoloe
Created October 23, 2014 08:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aoloe/1b68d61c027e97f38d52 to your computer and use it in GitHub Desktop.
Save aoloe/1b68d61c027e97f38d52 to your computer and use it in GitHub Desktop.
import html2text
import csv
import unicodedata
#from unidecode import unidecode
#print unidecode(u"\u5317\u4EB0")
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
h = html2text.HTML2Text()
# Ignore converting links from HTML
#h.ignore_links = True
#print h.handle("<p>Hello, <a href='http://earth.google.com/'>world</a>!")
with open("toc.csv", "rb") as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in reader:
# print(row)
filename = "index.php?id="+row[0]
print(filename)
with open (filename, "r") as file:
content = file.read()
#print(h.handle(content))
markdown_filename = row[1].replace(' ', '_').replace('!', '').replace(':', '').replace('\'', '')
print(markdown_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment