Skip to content

Instantly share code, notes, and snippets.

@kyle-n
Created November 14, 2023 15:40
Show Gist options
  • Save kyle-n/ecbd81c97f2415a35356f197f9ccf965 to your computer and use it in GitHub Desktop.
Save kyle-n/ecbd81c97f2415a35356f197f9ccf965 to your computer and use it in GitHub Desktop.
Markdown <-> Prosemirror converter in Swift
//
// MarkdownTransformer.swift
// Compose for Substack
//
// Created by Kyle Nazario on 7/19/20.
//
import Foundation
import Ink
import SwiftSoup
#if os(macOS)
import AppKit
#else
import UIKit
#endif
class MarkdownTransformer {
init() { }
static func convertToMarkdown(appConfig: AppConfig, node: BodyNode, previousSibling: BodyNode? = nil, nextSibling: BodyNode? = nil, parent: BodyNode? = nil, orderedListID: Int? = nil) -> String {
var prefix: String
var suffix: String
var nodeTextContent = node.text
switch node.type {
case .blockquote:
prefix = ""
suffix = ""
case .doc:
prefix = ""
suffix = ""
case .paragraph where parent?.type == .listItem:
prefix = ""
suffix = "\n"
case .paragraph where parent?.type == .blockquote:
prefix = "> "
suffix = "\n\n"
case .paragraph:
prefix = ""
suffix = "\n\n"
case .text:
prefix = ""
suffix = ""
case .heading:
let level = Int(node.attrs?.level ?? 1)
prefix = ""
(0..<level).forEach { i in
prefix += "#"
}
prefix += " "
suffix = "\n\n"
case .hardBreak:
prefix = ""
suffix = ""
case .bulletList:
prefix = ""
suffix = "\n"
case .orderedList:
prefix = ""
suffix = "\n"
case .listItem where orderedListID != nil:
prefix = String(orderedListID!) + ". "
suffix = ""
case .listItem:
prefix = appConfig.listItem.rawValue + " "
suffix = ""
case .horizontalRule:
prefix = appConfig.horizontalRule.rawValue
suffix = "\n\n"
case .image:
let alt: String = node.attrs?.alt ?? ""
let src: String = node.attrs?.src ?? ""
prefix = "![\(alt)](\(node.attrs!.toImageURLWithQueryParams())"
suffix = ")\n\n"
case .button:
let dataURL: String = node.attrs?.url ?? ""
let buttonText: String = node.attrs?.text ?? ""
prefix = "<button url=\"\(dataURL)\">\(buttonText)</button>"
suffix = "\n\n"
case .codeBlock:
prefix = "```\n" // 4 spaces
suffix = "\n```\n\n"
case .twitter:
prefix = ""
nodeTextContent = node.attrs?.url ?? "Could not load Twitter embed"
suffix = "\n\n"
case .youtube:
prefix = ""
nodeTextContent = node.attrs?.videoId != nil ? "https://www.youtube.com/watch?v=\(node.attrs!.videoId!)" : "Could not load YouTube embed"
suffix = "\n\n"
case .vimeo:
prefix = ""
nodeTextContent = node.attrs?.videoId != nil ? "https://vimeo.com/\(node.attrs!.videoId!)" : "Could not load Vimeo embed"
suffix = "\n\n"
case .spotify:
prefix = ""
nodeTextContent = node.attrs?.url ?? "Could not load Spotify embed"
suffix = "\n\n"
case .soundcloud:
prefix = ""
nodeTextContent = node.attrs?.url ?? "Could not load Soundcloud embed"
suffix = "\n\n"
}
func processContinuousTag(tagType: BodyNodeMark.MarkType, matchingTag: String) {
// if not preceded by another <em>
if (previousSibling?.marks?.first { $0.type == tagType } == nil) {
prefix = matchingTag + prefix
}
// if not followed by another em
if (nextSibling?.marks?.first { $0.type == tagType } == nil) {
suffix += matchingTag
}
}
if let marks = node.marks {
marks.forEach { mark in
switch mark.type {
case .em:
processContinuousTag(tagType: .em, matchingTag: appConfig.italics.rawValue)
case .strong:
processContinuousTag(tagType: .strong, matchingTag: appConfig.bold.rawValue)
case .link:
prefix += "["
suffix = "](\(mark.attrs?.href ?? ""))" + suffix
case .code:
prefix += "`"
suffix += "`"
}
}
}
var computedTextOfChildContent: String
if let content = node.content {
var i = 0
computedTextOfChildContent = content.map { childNode in
let prevChild = i > 0 ? content[i - 1] : nil
let nextChild = i + 1 <= content.count - 1 ? content[i + 1] : nil
var orderedListID: Int? = nil
if node.type == .orderedList {
orderedListID = i + 1
}
i += 1
return MarkdownTransformer.convertToMarkdown(
appConfig: appConfig,
node: childNode,
previousSibling: prevChild,
nextSibling: nextChild,
parent: node,
orderedListID: orderedListID
)
}
.joined()
} else {
computedTextOfChildContent = ""
}
return prefix + (nodeTextContent ?? "") + computedTextOfChildContent + suffix
}
static func convertToMarkdown(appConfig: AppConfig, encodedNode: String) -> String {
let node = try! JSONDecoder().decode(BodyNode.self, from: encodedNode.data(using: .utf8)!)
let x = convertToMarkdown(appConfig: appConfig, node: node)
print(x)
return x
}
// embeds
private static let vimeoEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?vimeo\\.com/\\d+$", options: [])
private static let youtubeEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?youtube\\.com/watch\\?v=\\w+$", options: [])
private static let twitterEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?twitter\\.com/.*/status/[^\\s]*$", options: [])
private static let spotifyEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?[\\w.]*\\.spotify\\.com/[^\\s]*$", options: [])
private static let soundcloudEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?[\\w.]*soundcloud\\.com/[^\\s]*$", options: [])
private static func getParagraphType(elem: Element) -> BodyNode.BodyNodeType {
let text = (try! elem.text()).trimmingCharacters(in: .whitespaces)
let all = NSRange(location: 0, length: text.count)
if MarkdownTransformer.vimeoEmbedRegex.firstMatch(in: text, options: [], range: all) != nil {
return .vimeo
}
if MarkdownTransformer.youtubeEmbedRegex.firstMatch(in: text, options: [], range: all) != nil {
return .youtube
}
if MarkdownTransformer.twitterEmbedRegex.firstMatch(in: text, options: [], range: all) != nil {
return .twitter
}
if MarkdownTransformer.spotifyEmbedRegex.firstMatch(in: text, options: [], range: all) != nil {
return .spotify
}
if MarkdownTransformer.soundcloudEmbedRegex.firstMatch(in: text, options: [], range: all) != nil {
return .soundcloud
}
return .paragraph
}
static func convertToBodyNode(account: Account, markdown: String) throws -> String {
func htmlToBodyNode(_ domNode: Node) -> [BodyNode] {
var node = BodyNode(
type: .text,
content: nil,
text: nil,
marks: nil,
attrs: nil
)
var nodes: Array<BodyNode> = []
var addNodeToSetAtEnd = true
if let elem = domNode as? Element {
let headingRegex = try? NSRegularExpression(pattern: "^h[0-9]$", options: [])
let range = NSRange(location: 0, length: elem.tagName().count)
var immediateChild: BodyNode? = nil
func processEmbed(_ elem: Element) {
node.type = BodyNode.BodyNodeType(rawValue: (try! elem.attr("data-type")))!
if node.type == .twitter {
node.attrs = node.attrs ?? BodyNodeAttrs()
node.attrs!.url = try! elem.attr("data-url")
node.attrs!.fullText = try! elem.attr("data-fullText")
node.attrs!.username = try! elem.attr("data-username")
node.attrs!.name = try! elem.attr("data-name")
node.attrs!.date = try! elem.attr("data-date")
node.attrs!.photos = []
node.attrs!.quotedTweet = nil
node.attrs!.retweetCount = Int(try! elem.attr("data-retweetCount"))!
node.attrs!.likeCount = Int(try! elem.attr("data-likeCount"))!
node.attrs!.expandedUrl = Empty()
}
}
switch elem.tagName() {
case "body":
node.type = .doc
case "p":
node.type = MarkdownTransformer.getParagraphType(elem: elem)
case "strong":
addNodeToSetAtEnd = false
case "em":
addNodeToSetAtEnd = false
case let headerTagName where headingRegex?.firstMatch(in: elem.tagName(), options: [], range: range) != nil:
node.type = .heading
let level = Int(String(headerTagName.last!))
node.attrs = BodyNodeAttrs(level: level)
node.content = [
BodyNode(type: .hardBreak),
]
case "a":
node.marks = [
BodyNodeMark(type: .link, attrs: BodyNodeAttrs(href: try! elem.attr("href"), title: nil))
]
node.text = try! elem.text()
case "ul":
node.type = .bulletList
node.attrs = BodyNodeAttrs(tight: false)
case "ol":
node.type = .orderedList
node.attrs = BodyNodeAttrs(order: 1, tight: false)
case "li":
node.type = .listItem
immediateChild = BodyNode(type: .paragraph)
case "hr":
node.type = .horizontalRule
case "img":
node.type = .image
node.attrs = BodyNodeAttrs(src: try! elem.attr("src"), alt: try! elem.attr("alt"))
node.attrs!.setAttributesFromURLParams(URL(string: try! elem.attr("src")))
case "button":
node.type = .button
let url = try! elem.attr("url")
node.attrs = BodyNodeAttrs(url: url, text: try! elem.text())
case "pre": // code block
node.type = .codeBlock
node.attrs = BodyNodeAttrs(params: "")
node.content = [
BodyNode(type: .text, text: try! elem.text())
]
case "code":
node.marks = [BodyNodeMark(type: .code)]
node.text = try! elem.text()
case "blockquote":
node.type = .blockquote
case "embed":
processEmbed(elem)
default:
print("no match for", elem.tagName())
}
let childNodes: Array<Node> = (elem.tagName() == "a" || elem.tagName() == "code") ? elem.children().array() : elem.getChildNodes()
var childContent: [BodyNode] = childNodes.flatMap { htmlToBodyNode($0) }
if elem.tagName() == "pre" {
childContent = []
}
// bold/italic tags should return arrays of nodes with no parent container
if elem.tagName() == "strong" {
childContent = childContent.map { node in
var strongedNode = node
if strongedNode.marks == nil {
strongedNode.marks = []
}
strongedNode.marks!.append(BodyNodeMark(type: .strong, attrs: nil))
return strongedNode
}
nodes = nodes + childContent
}
if elem.tagName() == "em" {
childContent = childContent.map { node in
var strongedNode = node
if strongedNode.marks == nil {
strongedNode.marks = []
}
strongedNode.marks!.append(BodyNodeMark(type: .em, attrs: nil))
return strongedNode
}
nodes = nodes + childContent
}
// embeds should add their specific attrs
if node.type == .vimeo {
if node.attrs == nil {
node.attrs = BodyNodeAttrs()
}
let videoId: String = (try! elem.text())
.trimmingCharacters(in: .whitespaces)
.components(separatedBy: ".com/")[1]
node.attrs!.videoId = videoId
}
if node.type == .youtube {
if node.attrs == nil {
node.attrs = BodyNodeAttrs()
}
let url = URL(string: (try! elem.text().trimmingCharacters(in: .whitespaces)))
if let url = url {
let videoId: String = url.queryParameters?["v"] ?? ""
node.type = .paragraph
node.content = [BodyNode(type: .youtube, attrs: BodyNodeAttrs(videoId: videoId))]
childContent = []
}
}
// embeds that require loading content from the editor
let embedsRequiringAPIData: Array<BodyNode.BodyNodeType> = [.twitter, .spotify, .soundcloud]
if embedsRequiringAPIData.contains(node.type) == true {
let embedURL = (try! elem.text()).trimmingCharacters(in: .whitespaces)
node.attrs = EmbedUplink.getEmbedDetails(account: account, embedType: node.type, embedURL: embedURL)
node.text = nil
}
if var immediateChild = immediateChild {
immediateChild.content = (immediateChild.content ?? []) + childContent
node.content = [immediateChild]
} else {
node.content = (node.content ?? []) + childContent
}
} else if let textNode = domNode as? TextNode {
node.text = textNode.text()
}
if node.type == .button || node.type == .image {
node.content = nil
}
if node.type == .vimeo || node.type == .twitter || node.type == .soundcloud {
node.content = nil
node.text = nil
}
if addNodeToSetAtEnd == true {
nodes.append(node)
}
return nodes
}
let htmlMarkup: String = MarkdownParser().html(from: markdown)
let html: Document = try SwiftSoup.parse(htmlMarkup)
let body: BodyNode = htmlToBodyNode(try! html.select("body")[0])[0]
let encodedBody = try! JSONEncoder().encode(body)
return String(data: encodedBody, encoding: .utf8)!
}
private static let fontSize: CGFloat = 20
private static let sansSerifFontSize: CGFloat = 16
static let inlineCodeRegex = try! NSRegularExpression(pattern: "`[^`]*`", options: [])
static let codeBlockRegex = try! NSRegularExpression(pattern: "(`){3}((?!\\1).)+\\1{3}", options: [.dotMatchesLineSeparators])
static let headingRegex = try! NSRegularExpression(pattern: "^#{1,6}\\s.*$", options: [.anchorsMatchLines])
static let linkOrImageRegex = try! NSRegularExpression(pattern: "!?\\[([^\\[\\]]*)\\]\\((.*?)\\)", options: [])
static let boldRegex = try! NSRegularExpression(pattern: "((\\*|_){2})((?!\\1).)+\\1", options: [])
static let underscoreEmphasisRegex = try! NSRegularExpression(pattern: "(?<!_)_[^_]+_(?!\\*)", options: [])
static let asteriskEmphasisRegex = try! NSRegularExpression(pattern: "(?<!\\*)(\\*)((?!\\1).)+\\1(?!\\*)", options: [])
static let boldEmphasisAsteriskRegex = try! NSRegularExpression(pattern: "(\\*){3}((?!\\1).)+\\1{3}", options: [])
static let blockquoteRegex = try! NSRegularExpression(pattern: "^>.*", options: [.anchorsMatchLines])
static let horizontalRuleRegex = try! NSRegularExpression(pattern: "\n\n(-{3}|\\*{3})\n", options: [])
static let unorderedListRegex = try! NSRegularExpression(pattern: "^(\\-|\\*)\\s", options: [.anchorsMatchLines])
static let orderedListRegex = try! NSRegularExpression(pattern: "^\\d*\\.\\s", options: [.anchorsMatchLines])
static let buttonRegex = try! NSRegularExpression(pattern: "<\\s*button[^>]*>(.*?)<\\s*/\\s*button>", options: [])
static let embedRegex = try! NSRegularExpression(pattern: "^https://[\\w.]+\\.com/[^\\s]*$", options: [.anchorsMatchLines])
static func toAttributedString(_ text: String, appConfig: AppConfig) -> NSAttributedString {
let attributedString = NSMutableAttributedString(string: text)
let all = NSRange(location: 0, length: text.count)
var editorFont: UIFont
var editorFontSize: CGFloat
var codeFontSize: CGFloat
let paragraphStyle = NSMutableParagraphStyle()
if appConfig.editorFont == .sanFrancisco {
editorFontSize = 20
codeFontSize = 18
editorFont = UIFont.systemFont(ofSize: editorFontSize)
paragraphStyle.lineSpacing = 1.5
} else {
editorFontSize = MarkdownTransformer.fontSize
codeFontSize = MarkdownTransformer.sansSerifFontSize
editorFont = UIFont(name: appConfig.editorFont.rawValue, size: editorFontSize)!
}
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.label, range: all)
attributedString.addAttribute(NSAttributedString.Key.font, value: editorFont, range: all)
attributedString.addAttribute(NSAttributedString.Key.paragraphStyle, value: paragraphStyle, range: all)
let inlineCodes = MarkdownTransformer.inlineCodeRegex.matches(in: text, options: [], range: all)
inlineCodes.forEach {
attributedString.addAttribute(NSAttributedString.Key.font, value: UIFont.monospacedSystemFont(ofSize: codeFontSize, weight: .thin), range: $0.range)
}
let horizontalRules = MarkdownTransformer.horizontalRuleRegex.matches(in: text, options: [], range: all)
horizontalRules.forEach {
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range)
}
let headings = MarkdownTransformer.headingRegex.matches(in: text, options: [], range: all)
headings.forEach {
var headerFont = UIFont.systemFont(ofSize: 0)
attributedString.enumerateAttributes(in: $0.range, options: []) { attributes, range, stop in
let fontAttribute = attributes.first { $0.key == .font }!
let font = fontAttribute.value as! UIFont
headerFont = font.with(.traitBold, .traitExpanded).withSize(editorFontSize * 1.25)
}
attributedString.addAttribute(NSAttributedString.Key.font, value: headerFont, range: $0.range)
attributedString.addAttribute(NSAttributedString.Key.kern, value: 0.5, range: $0.range)
}
let strongs = MarkdownTransformer.boldRegex.matches(in: text, options: [], range: all)
strongs.forEach {
var boldedFont: UIFont = UIFont.systemFont(ofSize: 0)
attributedString.enumerateAttributes(in: $0.range, options: []) { attributes, range, stop in
let fontAttribute = attributes.first { $0.key == .font }!
let font = fontAttribute.value as! UIFont
boldedFont = font.with(.traitBold)
}
attributedString.addAttribute(NSAttributedString.Key.font, value: boldedFont, range: $0.range)
}
let emphases = MarkdownTransformer.asteriskEmphasisRegex.matches(in: text, options: [], range: all) + MarkdownTransformer.underscoreEmphasisRegex.matches(in: text, options: [], range: all)
emphases.forEach {
var emphasizedFont: UIFont = UIFont.systemFont(ofSize: 0)
attributedString.enumerateAttributes(in: $0.range, options: [], using: { attributes, range, stop in
let fontAttribute = attributes.first { $0.key == .font }!
let font = fontAttribute.value as! UIFont
emphasizedFont = font.with(.traitItalic)
})
attributedString.addAttribute(NSAttributedString.Key.font, value: emphasizedFont, range: $0.range)
}
let boldEmphases = MarkdownTransformer.boldEmphasisAsteriskRegex.matches(in: text, options: [], range: all)
boldEmphases.forEach {
var emphasizedFont: UIFont = UIFont.systemFont(ofSize: 0)
attributedString.enumerateAttributes(in: $0.range, options: [], using: { attributes, range, stop in
let fontAttribute = attributes.first { $0.key == .font }!
let font = fontAttribute.value as! UIFont
emphasizedFont = font.with(.traitItalic, .traitBold)
})
attributedString.addAttribute(NSAttributedString.Key.font, value: emphasizedFont, range: $0.range)
}
let unorderedListItems = MarkdownTransformer.unorderedListRegex.matches(in: text, options: [], range: all)
unorderedListItems.forEach {
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range)
}
let orderedListItems = MarkdownTransformer.orderedListRegex.matches(in: text, options: [], range: all)
orderedListItems.forEach {
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range)
}
let linksAndImages = MarkdownTransformer.linkOrImageRegex.matches(in: text, options: [], range: all)
linksAndImages.forEach {
attributedString.addAttribute(NSAttributedString.Key.underlineStyle, value: NSUnderlineStyle.single.rawValue, range: $0.range)
}
let codeBlocks = MarkdownTransformer.codeBlockRegex.matches(in: text, options: [], range: all)
codeBlocks.forEach {
attributedString.addAttribute(NSAttributedString.Key.backgroundColor, value: UIColor.secondarySystemBackground, range: $0.range)
}
let buttons = MarkdownTransformer.buttonRegex.matches(in: text, options: [], range: all)
buttons.forEach {
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range)
}
let blockquotes = MarkdownTransformer.blockquoteRegex.matches(in: text, options: [], range: all)
blockquotes.forEach {
attributedString.addAttribute(NSAttributedString.Key.backgroundColor, value: UIColor.secondarySystemBackground, range: $0.range)
}
let embeds = MarkdownTransformer.embedRegex.matches(in: text, options: [], range: all)
embeds.forEach {
attributedString.addAttribute(NSAttributedString.Key.underlineStyle, value: NSUnderlineStyle.single.rawValue, range: $0.range)
}
return attributedString
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment