Skip to content

Instantly share code, notes, and snippets.

Last active January 31, 2022 16:55
Show Gist options
  • Save sk-t3ch/71c480017d844789841978d43c9be5b0 to your computer and use it in GitHub Desktop.
Save sk-t3ch/71c480017d844789841978d43c9be5b0 to your computer and use it in GitHub Desktop.
Medium To MarkDown Script for the service
import re
from datetime import datetime
import json
from io import BytesIO
import requests
def MediumToMarkdownBuilder(request_get):
def _func(url):
medium_post = load_medium_json(request_get)(url)
if (medium_post["type"] != "Post"):
raise Exception('Not a Medium Article')
story = {}
story["title"] = medium_post["title"]
story["date"] = datetime.fromtimestamp(
story["sections"] = medium_post["content"]["bodyModel"]["sections"]
story["paragraphs"] = medium_post["content"]["bodyModel"]["paragraphs"]
sections = [process_section(s) for s in story["sections"]]
if (len(story["paragraphs"]) > 1):
story["subtitle"] = story["paragraphs"][1]["text"]
story["markdown"] = []
"# " + story["title"].replace(r'/\n/g', '\n# '))
subtitle = story.get("subtitle", False)
if subtitle and subtitle != "":
"\n" + story["subtitle"].replace(r'/#+/', ''))
sections_len = len(sections)
for para_idx, paragraph in enumerate(story["paragraphs"]):
if para_idx < 2:
if (para_idx < sections_len):
p = story["paragraphs"][para_idx]
processed_paragraph = process_paragraph(request_get)(p)
print(f"{para_idx}#####: " + processed_paragraph)
markdown = '\n'.join(story["markdown"])
return markdown
return _func
def load_medium_json(fetch):
def _func(url):
response = fetch(url + "?format=json")
text = response.text
result = json.loads(text[text.index('{'):])["payload"]["value"]
return result
return _func
def process_section(s):
section = ""
background_image = s.get("backgroundImage", False)
if (background_image):
img_width = int(background_image["originalWidth"])
img_src = f"{MEDIUM_IMG_CDN}{max(img_width * 2, 2000)}/{background_image['id']}"
section = "\n![](" + img_src + ")"
return section
def get_embed(fetch):
def _func(url):
embed = ""
embed_json = load_medium_json(fetch)(url)
if (embed_json["domain"] in ["", ""]):
embed = get_GitHub_embed(fetch)(embed_json)
elif (embed_json["domain"] == ""):
embed = get_YouTube_embed(embed_json)
return embed
return _func
def get_GitHub_embed(fetch):
def _func(embed_json):
md_soure_code = ''
if (embed_json["gist"]):
gist = embed_json["gist"]
script_src = f"{gist['gistId']}"
gist_json_resp = fetch(script_src)
gist_json = gist_json_resp.json()
for file in gist_json["files"].values():
language = file["language"]
language = language.lower() if language is not None else ""
gist_code_resp = fetch(file["raw_url"])
gist_code = gist_code_resp.text
md_soure_code += ('\n```' + language + '\n')
md_soure_code += gist_code.replace(r'/\t/g', ' ')
md_soure_code += '\n```\n'
if (len(md_soure_code) > 0):
md_soure_code = md_soure_code[:len(md_soure_code) - 1]
return md_soure_code
except Exception as err:
print("ERR: ", err)
return ""
return _func
def get_YouTube_embed(embed_json):
body = embed_json["iframeSrc"]
regex = r"[^%]+)%3F"
matches =, body)
if (matches and len(matches.groups()) >= 1):
video_id = matches.groups(1)[0]
return f"<center><iframe width='560' height='315' src ='${video_id}' frameborder='0' allowfullscreen></iframe></center>"
return f"<iframe src='{body}' frameborder=0></iframe>"
def process_paragraph(fetch):
def _func(p):
markups_array = create_markups_array(p["markups"])
if (len(markups_array)):
previous_index = 0
text = p["text"]
tokens = []
for j_index, markup in enumerate(markups_array):
if (markup is not None):
token = text[previous_index: j_index]
previous_index = j_index
p["text"] = ''.join(tokens)
markup = ""
if p["type"] == 1:
markup = "\n"
elif p["type"] == 2:
p["text"] = "\n# " + p["text"].replace(r'/\n/g', '\n# ')
elif p["type"] == 3:
p["text"] = "\n## " + p["text"].replace(r'/\n/g', '\n## ')
elif p["type"] == 4:
# image & caption
img_width = int(p["metadata"]["originalWidth"])
img_src = f"{MEDIUM_IMG_CDN}{max(img_width * 2, 2000)}/{p['metadata']['id']}"
text = "\n![" + p["text"] + "](" + img_src + ")"
if (p["text"]):
text += "*\n\n" + p["text"] + "*"
p["text"] = text
elif p["type"] == 6:
markup = "> "
elif p["type"] == 7:
# quote
p["text"] = "> # " + p["text"].replace('\n', '\n> # ')
elif p["type"] == 8:
p["text"] = "\n " + p["text"].replace('\n', '\n ')
elif p["type"] == 9:
markup = "\n* "
elif p["type"] == 10:
markup = "\n1. "
elif p["type"] == 11:
mediaURL = f"{p['iframe']['mediaResourceId']}"
embed = get_embed(fetch)(mediaURL)
# print("EMBED: ", embed)
return f"\n{ embed }"
elif p["type"] == 13:
markup = "\n### "
elif p["type"] == 15:
# // caption for section image
p["text"] = "*" + p["text"] + "*"
p["text"] = markup + p["text"]
if (p.get("alignment", False) == 2 and p["type"] != 6 and p["type"] != 7):
p["text"] = "<center>" + p["text"] + "</center>"
return p["text"]
return _func
def add_markup(markups_array, open, close, start, end):
if markups_array[start]:
markups_array[start] += open
markups_array[start] = open
if markups_array[end]:
markups_array[end] += close
markups_array[end] = close
return markups_array
def create_markups_array(markups):
if (not markups or len(markups) == 0):
return []
markups_array = [None] * (max(map(lambda x: x["end"], markups))+1)
for m in markups:
if m["type"] == 1:
# // bold
add_markup(markups_array, "**", "**", m["start"], m["end"])
elif m["type"] == 2:
# // italic
add_markup(markups_array, "*", "*", m["start"], m["end"])
elif m["type"] == 3:
# // anchor tag
add_markup(markups_array, "[", "](" +
m["href"] + ")", m["start"], m["end"])
elif m["type"] == 8:
# // code tag
add_markup(markups_array, "```", "```", m["start"], m["end"])
elif m["type"] == 10:
# // code tag
add_markup(markups_array, "`", "`", m["start"], m["end"])
print("Unknown markup type " + m["type"], m)
return markups_array
def process_event(event):
record = event['Records'][0]
result = json.loads(record['body'])
return result
MediumToMarkdown = MediumToMarkdownBuilder(requests.get)
url = ""
medium_post = MediumToMarkdown(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment