Skip to content

Instantly share code, notes, and snippets.

@ColdHeat
Last active July 24, 2020 19:21
Show Gist options
  • Save ColdHeat/085c47359ab86c18864135a198cbe505 to your computer and use it in GitHub Desktop.
Save ColdHeat/085c47359ab86c18864135a198cbe505 to your computer and use it in GitHub Desktop.
Script to help update CTFd Pages to CommonMark
import re
import subprocess
import tempfile
import cmarkgfm
import mistune
from bs4 import BeautifulSoup
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import sessionmaker
from distutils.util import strtobool
import sys
def prompt(query):
sys.stdout.write("%s [y/n]: " % query)
val = input()
try:
ret = strtobool(val)
except ValueError:
sys.stdout.write("Please answer with y/n")
return prompt(query)
return bool(ret)
def old(md):
markdown = mistune.Markdown()
return markdown(md)
def new(md):
return cmarkgfm.markdown_to_html_with_extensions(
md, extensions=["autolink", "table", "strikethrough"]
)
def regex_add_newline(match):
match = match.group()
return f"\n{match}"
def fix_html(html):
out = subprocess.run(["html-beautify", "--max_preserve_newlines=0"], capture_output=True, text=True, input=html)
pretty = out.stdout
return pretty
def fix_markdown_with_html(html):
soup = BeautifulSoup(html, "html.parser")
# Prettify the soup. This should remove extraneous newlines to make HTML blocks not end prematurely.
pretty_soup = soup.prettify()
# Add a newline to all end div tags. This should be expanded to other tags as well.
pretty_soup = re.sub(r"^</div>", "</div>\n", pretty_soup, flags=re.MULTILINE)
# All starting parent block tags should have a padding newline before them.
pretty_soup = re.sub(
r"^<[a-z](.*)>", regex_add_newline, pretty_soup, flags=re.MULTILINE
)
return pretty_soup
# Connect to the database
Base = automap_base()
# "sqlite:///CTFd/CTFd.db"
engine = create_engine(input("Enter database url: "))
Base.prepare(engine, reflect=True)
Session = sessionmaker(bind=engine)
session = Session()
Pages = Base.classes.pages
# Get all pages
pages = session.query(Pages).all()
for p in pages:
subprocess.run(["less"], text=True, input=p.content, shell=True)
t = input("Is this html or markdown? ")
if t == "html":
cmd = fix_html
else:
cmd = fix_markdown_with_html
with tempfile.NamedTemporaryFile(
suffix=".html"
) as temp1, tempfile.NamedTemporaryFile(suffix=".html") as temp2:
temp1.write(bytes(old(p.content), encoding="utf-8"))
temp1.flush()
temp2.write(bytes(new(cmd(p.content)), encoding="utf-8"))
temp2.flush()
# Show textual diff
subprocess.call(["vimdiff", temp1.name, temp2.name])
# Generate image of previous HTML
subprocess.call(
[
"wkhtmltoimage",
"--load-error-handling",
"ignore",
f"file://{temp1.name}",
"temp1.png",
]
)
# Generate image of new HTML
subprocess.call(
[
"wkhtmltoimage",
"--load-error-handling",
"ignore",
f"file://{temp2.name}",
"temp2.png",
]
)
# Generate diff image
subprocess.call(
[
"compare",
"-identify",
"-metric",
"MAE",
"temp1.png",
"temp2.png",
"diff.png",
]
)
# Combine all images together
subprocess.call(
["convert", "+append", "temp1.png", "temp2.png", "diff.png", "temp.png"]
)
# Open and show to user
subprocess.call(["open", "temp.png"])
# Update content
p.content = fix_html(p.content)
cmd = input("Waiting for input to move to next page...")
print("=" * 20)
print("\n" * 3)
confirm = prompt("Are all changes good? This will commit the changes to the db.")
print(confirm)
if confirm is True:
session.commit()
else:
print("okay skipping...")
yarn global add js-beautify
mistune==0.8.4
beautifulsoup4==4.9.1
SQLAlchemy==1.3.18
cmarkgfm==0.4.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment