Skip to content

Instantly share code, notes, and snippets.

@SubhadityaMukherjee
Created November 13, 2022 12:13
Show Gist options
  • Save SubhadityaMukherjee/c77ad93b8741d464e60adc17807171b2 to your computer and use it in GitHub Desktop.
Save SubhadityaMukherjee/c77ad93b8741d464e60adc17807171b2 to your computer and use it in GitHub Desktop.
Website Formatter
import clipboard
import os
from pathlib import Path
import re
def apply_transforms(txt, list_of_trans):
for trans in list_of_trans:
txt = trans(txt)
return txt
def replacer(txt, dict_of_replace):
for rep in dict_of_replace.keys():
txt = txt.replace(rep, dict_of_replace[rep])
return txt
def regexreplacer(txt, dict_of_replace):
for rep in dict_of_replace.keys():
txt = re.sub(rep, dict_of_replace[rep], txt)
return txt
def indent_transform(txt):
l = txt.split("\n")
return "\n".join([x.strip() for x in l])
def paragraph_converter(txt):
l = txt.split("\n")
for item in range(len(l)):
if len(l[item])>0 and l[item][0] not in ["#"," ",]:
l[item] = "- "+l[item]
return "\n".join(l)
dict_of_replace = {
#"*": "",
"- **": "# ",
"**": "",
"****": "",
"[latex]": "$",
"[/latex]": "$",
"<math>":"$",
"</math>":"$",
"__": "",
"<h3>": "# [[",
"</h3>": "]]",
"<h1>": "[[",
"</h1>": "]]",
#"<h2>": "[[",
#"</h2>": "]]",
"<h2>": "# ",
"</h2>": "",
"“":"'",
"”":"'",
#". ": "\n",
#":": "\n- ",
# Wikipedia specific stuff
")>":"",
#") ":" ",
"#cite_note":" ",
"(<https://en.wikipedia.org/wiki/":" ",
"(https://en.wikipedia.org/wiki/":" ",
"(<https://en.wikipedia.org/w/index.php?title=":" ",
"(https://en.wikipedia.org/w/index.php?title=":" ",
"s&action=edit&redlink=1":" ",
# Pdfannots
">":"-",
}
dict_regex_replace = {
r'\[.*?\]':"", #wiki links
r' \* Page #.*\:':"", #pdfannots page numbers
}
text = clipboard.paste()
text = replacer(text, dict_of_replace)
text = regexreplacer(text, dict_regex_replace)
text = apply_transforms(text, [
indent_transform,
paragraph_converter,
])
clipboard.copy(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment