Skip to content

Instantly share code, notes, and snippets.

@awan1
Created December 11, 2020 10:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awan1/714f9d166bde8e2aadc4c2cb28f4c8da to your computer and use it in GitHub Desktop.
Save awan1/714f9d166bde8e2aadc4c2cb28f4c8da to your computer and use it in GitHub Desktop.
Python code for converting Daystar Eld's "Pokemon: The Origin of Species" to Markdown
# You'll need to `pip install html2text`
import urllib.request
import html2text
def get_text_from_url(url):
fp = urllib.request.urlopen(url)
url_bytes = fp.read()
url_str = url_bytes.decode("utf8")
fp.close()
return html2text.html2text(url_str)
# These strings mark the start and end of the chapter, as formatted on daystareld.com
chapter_start_str = "# Chapter"
chapter_end_str = "[](https://mistape.com)"
start_chapter = 1
end_chapter = 87
def get_chapter(i):
txt = get_text_from_url("http://daystareld.com/pokemon-{}/".format(i))
return txt[txt.find(chapter_start_str):txt.find(chapter_end_str)]
# If you want to write one chapter per file
def write_all_chapters_to_files():
for i in range(start_chapter, end_chapter+1):
print("Getting chapter {}".format(i))
txt = get_chapter(i)
with open("pokemon-toos-ch{}.md".format(i), "w") as f:
f.write(txt)
# To combine all chapters into one
def combine_saved_chapters():
complete_text = ""
for i in range(start_chapter, end_chapter+1):
with open("pokemon-toos-ch{}.md".format(i), "r") as f:
complete_text += f.read()
# Add newlines between chapters
complete_text += "\n\n"
with open("pokemon-toos-ch{}-{}.md".format(start_chapter, end_chapter), "w") as f:
f.write(complete_text)
# Now you have a complete markdown file in `pokemon-toos-ch1-87.md`.
# This can be converted into PDF or EPUB as you like.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment