Skip to content

Instantly share code, notes, and snippets.

Last active September 21, 2022 21:18
Show Gist options
  • Save colinfwren/3e35388ec13d03e6811f3cb793ee31c0 to your computer and use it in GitHub Desktop.
Save colinfwren/3e35388ec13d03e6811f3cb793ee31c0 to your computer and use it in GitHub Desktop.
A script to convert Medium export into Markdown usable by Gatsby
import fetch from 'node-fetch'
import {read} from 'to-vfile'
import {unified} from 'unified'
import rehypeParse from 'rehype-parse'
import rehypeRemark from 'rehype-remark'
import remarkStringify from 'remark-stringify'
import { writeFile, promises as fs } from 'fs'
import { selectAll } from "hast-util-select";
import {toHtml} from "hast-util-to-html";
import slugify from "slugify";
import frontmatter from 'remark-frontmatter'
import {visit} from "unist-util-visit";
import path from 'path'
async function downloadImage(url, pathName) {
try {
const imageResp = await fetch(url)
const imageData = await imageResp.arrayBuffer()
await writeFile(pathName, Buffer.from(imageData), (err) => {
if (err)
console.log(`Failed to write ${pathName}`)
else {
console.log(`Wrote ${pathName}`)
return pathName
} catch (error) {
console.error('Failed to fetch', url, error)
return url
async function downloadGistCode(url) {
try {
const rawUrl = `${url.split('.js')[0]}/raw`
const codeResp = await fetch(rawUrl)
return await codeResp.text()
} catch (error) {
console.error('Failed to fetch', url, error)
return false
function getFilename(src) {
const filename = src.split('/').pop()
if (filename.split('.').length > 1) {
return filename
} else {
return `${filename}.jpg`
function rehypeDownloadImages() {
return async (tree, file) => {
const nodes = selectAll('img', tree)
await Promise.all( (node) => {
const filename = getFilename(
const outputPath = path.join(file.outputFolder, filename)
await downloadImage(, outputPath) = filename
return node
return tree
function rehypeInlineGistScript() {
return async (tree) => {
const nodes = selectAll('script', tree)
await Promise.all( (node) => {
if ('gist') > -1) {
const code = await downloadGistCode( = {}
node.type = 'text'
node.value = '\n```\n' + code + '\n```\n'
return node
return tree
function gatherFrontMatterData() {
return async (tree, file) => {
const title = tree.children.find(x => x.tagName === 'title').children[0].value
const articleContent = tree.children.find(x => x.tagName === 'article').children
const subtitle = articleContent.find(x => && === 'subtitle')
const footerContent = articleContent.find(x => x.tagName === 'footer').children
const date = footerContent.reduce((acc, node) => {
const links = node.children ? node.children.filter(x => x.tagName === 'a') : [] => {
const time = x.children.find(x => x.tagName === 'time')
if (time) {
acc ='T')[0]
return acc
}, '')
file.frontmatter = {
slug: slugify(title, { lower: true }),
excerpt: subtitle ? subtitle.children[0].value : ''
return Promise.resolve(tree)
function setFrontMatter() {
return (tree, file) => {
type: 'yaml',
value: `
slug: "${file.frontmatter.slug}"
date: "${}"
title: "${file.frontmatter.title}"
except: "${file.frontmatter.excerpt.trim()}"
function removeMediumExtras() {
return (tree) => {
const article = tree.children.find(x => x.tagName === 'article')
article.children = article.children.filter((node) => && === 'body')
visit(tree, { tagName: 'hr' }, (node, index, parent) => {
if ('section-divider')) {
parent.children.splice(index, 1)
visit(tree, { tagName: 'h3' }, (node, index, parent) => {
if ('graf--title')) {
parent.children.splice(index, 1)
function createDirectoryForPost(options) {
return async (tree, file) => {
const fullPath = path.join(options.outputFolder, `${}-${file.frontmatter.slug}`)
file.outputFolder = fullPath
try {
await fs.mkdir(fullPath, (err) => {
if (err && err.code != 'EEXIST') throw err
} catch (error) {
console.error(`Failed to create output folder at ${fullPath}`)
const paddingNode = {
type: 'text',
value: '\n',
function getCaption(node) {
switch(node.children.length) {
case 0:
return ''
case 1:
return node.children[0].value
return node.children.reduce((acc, child) => {
if (child.type === 'text') {
acc = `${acc} ${child.value}`
if (child.tagName === 'a') {
acc = `${acc} ${child.children[0].value}`
return acc
}, '')
async function convertHtmlToMarkdown(filePath, outputFolder ) {
const tree = await unified()
.use(rehypeParse, {fragment: true})
.use(createDirectoryForPost, { outputFolder })
.use(rehypeRemark, {
handlers: { // defines how to handle specific HTML tags
figure(h, node) {
const captionNode = node.children.find(child => child.tagName === 'figcaption')
const caption = captionNode ? getCaption(captionNode) : ''
const cleansedChildren = => {
// Add the figcaption text to the img so when converted to Markdown it will use that
if (child.tagName === 'img') {
return {
properties: {
alt: || caption
if (child.tagName === 'figcaption') {
return {
properties: {}
return child
const cleansedNode = {
properties: {},
children: cleansedChildren.reduce((acc, child) => {
return acc
}, [paddingNode])
return h(cleansedNode, 'html', toHtml(cleansedNode, { closeSelfClosing: true }))
.process(await read(filePath))
const fullPath = path.join(outputFolder, `${}-${tree.frontmatter.slug}/index.mdx`)
const fileContent = String(tree)
const cleanedFileContent = fileContent.replace(/ | |/g, ' ')
await writeFile(fullPath, cleanedFileContent, (err) => {
if (err) {
console.error(`Failed to write file at ${fullPath}`, err)
async function processFilesInDirectory(directory) {
try {
const cwd = path.resolve()
const fullPath = path.join(cwd, directory)
const outputPath = path.join(cwd, 'gatsby-posts')
const files = await fs.readdir(fullPath)
await fs.mkdir(outputPath)
const htmlFiles = files.filter(file => path.extname(file) === '.html')
await Promise.all( (file) => {
const filePath = path.join(fullPath, file)
await convertHtmlToMarkdown(filePath, outputPath)
} catch (error) {
console.error('Failed to process files in directory', error)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment