Created
November 14, 2023 15:40
-
-
Save kyle-n/ecbd81c97f2415a35356f197f9ccf965 to your computer and use it in GitHub Desktop.
Markdown <-> Prosemirror converter in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// MarkdownTransformer.swift | |
// Compose for Substack | |
// | |
// Created by Kyle Nazario on 7/19/20. | |
// | |
import Foundation | |
import Ink | |
import SwiftSoup | |
#if os(macOS) | |
import AppKit | |
#else | |
import UIKit | |
#endif | |
class MarkdownTransformer { | |
init() { } | |
static func convertToMarkdown(appConfig: AppConfig, node: BodyNode, previousSibling: BodyNode? = nil, nextSibling: BodyNode? = nil, parent: BodyNode? = nil, orderedListID: Int? = nil) -> String { | |
var prefix: String | |
var suffix: String | |
var nodeTextContent = node.text | |
switch node.type { | |
case .blockquote: | |
prefix = "" | |
suffix = "" | |
case .doc: | |
prefix = "" | |
suffix = "" | |
case .paragraph where parent?.type == .listItem: | |
prefix = "" | |
suffix = "\n" | |
case .paragraph where parent?.type == .blockquote: | |
prefix = "> " | |
suffix = "\n\n" | |
case .paragraph: | |
prefix = "" | |
suffix = "\n\n" | |
case .text: | |
prefix = "" | |
suffix = "" | |
case .heading: | |
let level = Int(node.attrs?.level ?? 1) | |
prefix = "" | |
(0..<level).forEach { i in | |
prefix += "#" | |
} | |
prefix += " " | |
suffix = "\n\n" | |
case .hardBreak: | |
prefix = "" | |
suffix = "" | |
case .bulletList: | |
prefix = "" | |
suffix = "\n" | |
case .orderedList: | |
prefix = "" | |
suffix = "\n" | |
case .listItem where orderedListID != nil: | |
prefix = String(orderedListID!) + ". " | |
suffix = "" | |
case .listItem: | |
prefix = appConfig.listItem.rawValue + " " | |
suffix = "" | |
case .horizontalRule: | |
prefix = appConfig.horizontalRule.rawValue | |
suffix = "\n\n" | |
case .image: | |
let alt: String = node.attrs?.alt ?? "" | |
let src: String = node.attrs?.src ?? "" | |
prefix = "![\(alt)](\(node.attrs!.toImageURLWithQueryParams())" | |
suffix = ")\n\n" | |
case .button: | |
let dataURL: String = node.attrs?.url ?? "" | |
let buttonText: String = node.attrs?.text ?? "" | |
prefix = "<button url=\"\(dataURL)\">\(buttonText)</button>" | |
suffix = "\n\n" | |
case .codeBlock: | |
prefix = "```\n" // 4 spaces | |
suffix = "\n```\n\n" | |
case .twitter: | |
prefix = "" | |
nodeTextContent = node.attrs?.url ?? "Could not load Twitter embed" | |
suffix = "\n\n" | |
case .youtube: | |
prefix = "" | |
nodeTextContent = node.attrs?.videoId != nil ? "https://www.youtube.com/watch?v=\(node.attrs!.videoId!)" : "Could not load YouTube embed" | |
suffix = "\n\n" | |
case .vimeo: | |
prefix = "" | |
nodeTextContent = node.attrs?.videoId != nil ? "https://vimeo.com/\(node.attrs!.videoId!)" : "Could not load Vimeo embed" | |
suffix = "\n\n" | |
case .spotify: | |
prefix = "" | |
nodeTextContent = node.attrs?.url ?? "Could not load Spotify embed" | |
suffix = "\n\n" | |
case .soundcloud: | |
prefix = "" | |
nodeTextContent = node.attrs?.url ?? "Could not load Soundcloud embed" | |
suffix = "\n\n" | |
} | |
func processContinuousTag(tagType: BodyNodeMark.MarkType, matchingTag: String) { | |
// if not preceded by another <em> | |
if (previousSibling?.marks?.first { $0.type == tagType } == nil) { | |
prefix = matchingTag + prefix | |
} | |
// if not followed by another em | |
if (nextSibling?.marks?.first { $0.type == tagType } == nil) { | |
suffix += matchingTag | |
} | |
} | |
if let marks = node.marks { | |
marks.forEach { mark in | |
switch mark.type { | |
case .em: | |
processContinuousTag(tagType: .em, matchingTag: appConfig.italics.rawValue) | |
case .strong: | |
processContinuousTag(tagType: .strong, matchingTag: appConfig.bold.rawValue) | |
case .link: | |
prefix += "[" | |
suffix = "](\(mark.attrs?.href ?? ""))" + suffix | |
case .code: | |
prefix += "`" | |
suffix += "`" | |
} | |
} | |
} | |
var computedTextOfChildContent: String | |
if let content = node.content { | |
var i = 0 | |
computedTextOfChildContent = content.map { childNode in | |
let prevChild = i > 0 ? content[i - 1] : nil | |
let nextChild = i + 1 <= content.count - 1 ? content[i + 1] : nil | |
var orderedListID: Int? = nil | |
if node.type == .orderedList { | |
orderedListID = i + 1 | |
} | |
i += 1 | |
return MarkdownTransformer.convertToMarkdown( | |
appConfig: appConfig, | |
node: childNode, | |
previousSibling: prevChild, | |
nextSibling: nextChild, | |
parent: node, | |
orderedListID: orderedListID | |
) | |
} | |
.joined() | |
} else { | |
computedTextOfChildContent = "" | |
} | |
return prefix + (nodeTextContent ?? "") + computedTextOfChildContent + suffix | |
} | |
static func convertToMarkdown(appConfig: AppConfig, encodedNode: String) -> String { | |
let node = try! JSONDecoder().decode(BodyNode.self, from: encodedNode.data(using: .utf8)!) | |
let x = convertToMarkdown(appConfig: appConfig, node: node) | |
print(x) | |
return x | |
} | |
// embeds | |
private static let vimeoEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?vimeo\\.com/\\d+$", options: []) | |
private static let youtubeEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?youtube\\.com/watch\\?v=\\w+$", options: []) | |
private static let twitterEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?twitter\\.com/.*/status/[^\\s]*$", options: []) | |
private static let spotifyEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?[\\w.]*\\.spotify\\.com/[^\\s]*$", options: []) | |
private static let soundcloudEmbedRegex = try! NSRegularExpression(pattern: "^https://(www\\.)?[\\w.]*soundcloud\\.com/[^\\s]*$", options: []) | |
private static func getParagraphType(elem: Element) -> BodyNode.BodyNodeType { | |
let text = (try! elem.text()).trimmingCharacters(in: .whitespaces) | |
let all = NSRange(location: 0, length: text.count) | |
if MarkdownTransformer.vimeoEmbedRegex.firstMatch(in: text, options: [], range: all) != nil { | |
return .vimeo | |
} | |
if MarkdownTransformer.youtubeEmbedRegex.firstMatch(in: text, options: [], range: all) != nil { | |
return .youtube | |
} | |
if MarkdownTransformer.twitterEmbedRegex.firstMatch(in: text, options: [], range: all) != nil { | |
return .twitter | |
} | |
if MarkdownTransformer.spotifyEmbedRegex.firstMatch(in: text, options: [], range: all) != nil { | |
return .spotify | |
} | |
if MarkdownTransformer.soundcloudEmbedRegex.firstMatch(in: text, options: [], range: all) != nil { | |
return .soundcloud | |
} | |
return .paragraph | |
} | |
static func convertToBodyNode(account: Account, markdown: String) throws -> String { | |
func htmlToBodyNode(_ domNode: Node) -> [BodyNode] { | |
var node = BodyNode( | |
type: .text, | |
content: nil, | |
text: nil, | |
marks: nil, | |
attrs: nil | |
) | |
var nodes: Array<BodyNode> = [] | |
var addNodeToSetAtEnd = true | |
if let elem = domNode as? Element { | |
let headingRegex = try? NSRegularExpression(pattern: "^h[0-9]$", options: []) | |
let range = NSRange(location: 0, length: elem.tagName().count) | |
var immediateChild: BodyNode? = nil | |
func processEmbed(_ elem: Element) { | |
node.type = BodyNode.BodyNodeType(rawValue: (try! elem.attr("data-type")))! | |
if node.type == .twitter { | |
node.attrs = node.attrs ?? BodyNodeAttrs() | |
node.attrs!.url = try! elem.attr("data-url") | |
node.attrs!.fullText = try! elem.attr("data-fullText") | |
node.attrs!.username = try! elem.attr("data-username") | |
node.attrs!.name = try! elem.attr("data-name") | |
node.attrs!.date = try! elem.attr("data-date") | |
node.attrs!.photos = [] | |
node.attrs!.quotedTweet = nil | |
node.attrs!.retweetCount = Int(try! elem.attr("data-retweetCount"))! | |
node.attrs!.likeCount = Int(try! elem.attr("data-likeCount"))! | |
node.attrs!.expandedUrl = Empty() | |
} | |
} | |
switch elem.tagName() { | |
case "body": | |
node.type = .doc | |
case "p": | |
node.type = MarkdownTransformer.getParagraphType(elem: elem) | |
case "strong": | |
addNodeToSetAtEnd = false | |
case "em": | |
addNodeToSetAtEnd = false | |
case let headerTagName where headingRegex?.firstMatch(in: elem.tagName(), options: [], range: range) != nil: | |
node.type = .heading | |
let level = Int(String(headerTagName.last!)) | |
node.attrs = BodyNodeAttrs(level: level) | |
node.content = [ | |
BodyNode(type: .hardBreak), | |
] | |
case "a": | |
node.marks = [ | |
BodyNodeMark(type: .link, attrs: BodyNodeAttrs(href: try! elem.attr("href"), title: nil)) | |
] | |
node.text = try! elem.text() | |
case "ul": | |
node.type = .bulletList | |
node.attrs = BodyNodeAttrs(tight: false) | |
case "ol": | |
node.type = .orderedList | |
node.attrs = BodyNodeAttrs(order: 1, tight: false) | |
case "li": | |
node.type = .listItem | |
immediateChild = BodyNode(type: .paragraph) | |
case "hr": | |
node.type = .horizontalRule | |
case "img": | |
node.type = .image | |
node.attrs = BodyNodeAttrs(src: try! elem.attr("src"), alt: try! elem.attr("alt")) | |
node.attrs!.setAttributesFromURLParams(URL(string: try! elem.attr("src"))) | |
case "button": | |
node.type = .button | |
let url = try! elem.attr("url") | |
node.attrs = BodyNodeAttrs(url: url, text: try! elem.text()) | |
case "pre": // code block | |
node.type = .codeBlock | |
node.attrs = BodyNodeAttrs(params: "") | |
node.content = [ | |
BodyNode(type: .text, text: try! elem.text()) | |
] | |
case "code": | |
node.marks = [BodyNodeMark(type: .code)] | |
node.text = try! elem.text() | |
case "blockquote": | |
node.type = .blockquote | |
case "embed": | |
processEmbed(elem) | |
default: | |
print("no match for", elem.tagName()) | |
} | |
let childNodes: Array<Node> = (elem.tagName() == "a" || elem.tagName() == "code") ? elem.children().array() : elem.getChildNodes() | |
var childContent: [BodyNode] = childNodes.flatMap { htmlToBodyNode($0) } | |
if elem.tagName() == "pre" { | |
childContent = [] | |
} | |
// bold/italic tags should return arrays of nodes with no parent container | |
if elem.tagName() == "strong" { | |
childContent = childContent.map { node in | |
var strongedNode = node | |
if strongedNode.marks == nil { | |
strongedNode.marks = [] | |
} | |
strongedNode.marks!.append(BodyNodeMark(type: .strong, attrs: nil)) | |
return strongedNode | |
} | |
nodes = nodes + childContent | |
} | |
if elem.tagName() == "em" { | |
childContent = childContent.map { node in | |
var strongedNode = node | |
if strongedNode.marks == nil { | |
strongedNode.marks = [] | |
} | |
strongedNode.marks!.append(BodyNodeMark(type: .em, attrs: nil)) | |
return strongedNode | |
} | |
nodes = nodes + childContent | |
} | |
// embeds should add their specific attrs | |
if node.type == .vimeo { | |
if node.attrs == nil { | |
node.attrs = BodyNodeAttrs() | |
} | |
let videoId: String = (try! elem.text()) | |
.trimmingCharacters(in: .whitespaces) | |
.components(separatedBy: ".com/")[1] | |
node.attrs!.videoId = videoId | |
} | |
if node.type == .youtube { | |
if node.attrs == nil { | |
node.attrs = BodyNodeAttrs() | |
} | |
let url = URL(string: (try! elem.text().trimmingCharacters(in: .whitespaces))) | |
if let url = url { | |
let videoId: String = url.queryParameters?["v"] ?? "" | |
node.type = .paragraph | |
node.content = [BodyNode(type: .youtube, attrs: BodyNodeAttrs(videoId: videoId))] | |
childContent = [] | |
} | |
} | |
// embeds that require loading content from the editor | |
let embedsRequiringAPIData: Array<BodyNode.BodyNodeType> = [.twitter, .spotify, .soundcloud] | |
if embedsRequiringAPIData.contains(node.type) == true { | |
let embedURL = (try! elem.text()).trimmingCharacters(in: .whitespaces) | |
node.attrs = EmbedUplink.getEmbedDetails(account: account, embedType: node.type, embedURL: embedURL) | |
node.text = nil | |
} | |
if var immediateChild = immediateChild { | |
immediateChild.content = (immediateChild.content ?? []) + childContent | |
node.content = [immediateChild] | |
} else { | |
node.content = (node.content ?? []) + childContent | |
} | |
} else if let textNode = domNode as? TextNode { | |
node.text = textNode.text() | |
} | |
if node.type == .button || node.type == .image { | |
node.content = nil | |
} | |
if node.type == .vimeo || node.type == .twitter || node.type == .soundcloud { | |
node.content = nil | |
node.text = nil | |
} | |
if addNodeToSetAtEnd == true { | |
nodes.append(node) | |
} | |
return nodes | |
} | |
let htmlMarkup: String = MarkdownParser().html(from: markdown) | |
let html: Document = try SwiftSoup.parse(htmlMarkup) | |
let body: BodyNode = htmlToBodyNode(try! html.select("body")[0])[0] | |
let encodedBody = try! JSONEncoder().encode(body) | |
return String(data: encodedBody, encoding: .utf8)! | |
} | |
private static let fontSize: CGFloat = 20 | |
private static let sansSerifFontSize: CGFloat = 16 | |
static let inlineCodeRegex = try! NSRegularExpression(pattern: "`[^`]*`", options: []) | |
static let codeBlockRegex = try! NSRegularExpression(pattern: "(`){3}((?!\\1).)+\\1{3}", options: [.dotMatchesLineSeparators]) | |
static let headingRegex = try! NSRegularExpression(pattern: "^#{1,6}\\s.*$", options: [.anchorsMatchLines]) | |
static let linkOrImageRegex = try! NSRegularExpression(pattern: "!?\\[([^\\[\\]]*)\\]\\((.*?)\\)", options: []) | |
static let boldRegex = try! NSRegularExpression(pattern: "((\\*|_){2})((?!\\1).)+\\1", options: []) | |
static let underscoreEmphasisRegex = try! NSRegularExpression(pattern: "(?<!_)_[^_]+_(?!\\*)", options: []) | |
static let asteriskEmphasisRegex = try! NSRegularExpression(pattern: "(?<!\\*)(\\*)((?!\\1).)+\\1(?!\\*)", options: []) | |
static let boldEmphasisAsteriskRegex = try! NSRegularExpression(pattern: "(\\*){3}((?!\\1).)+\\1{3}", options: []) | |
static let blockquoteRegex = try! NSRegularExpression(pattern: "^>.*", options: [.anchorsMatchLines]) | |
static let horizontalRuleRegex = try! NSRegularExpression(pattern: "\n\n(-{3}|\\*{3})\n", options: []) | |
static let unorderedListRegex = try! NSRegularExpression(pattern: "^(\\-|\\*)\\s", options: [.anchorsMatchLines]) | |
static let orderedListRegex = try! NSRegularExpression(pattern: "^\\d*\\.\\s", options: [.anchorsMatchLines]) | |
static let buttonRegex = try! NSRegularExpression(pattern: "<\\s*button[^>]*>(.*?)<\\s*/\\s*button>", options: []) | |
static let embedRegex = try! NSRegularExpression(pattern: "^https://[\\w.]+\\.com/[^\\s]*$", options: [.anchorsMatchLines]) | |
static func toAttributedString(_ text: String, appConfig: AppConfig) -> NSAttributedString { | |
let attributedString = NSMutableAttributedString(string: text) | |
let all = NSRange(location: 0, length: text.count) | |
var editorFont: UIFont | |
var editorFontSize: CGFloat | |
var codeFontSize: CGFloat | |
let paragraphStyle = NSMutableParagraphStyle() | |
if appConfig.editorFont == .sanFrancisco { | |
editorFontSize = 20 | |
codeFontSize = 18 | |
editorFont = UIFont.systemFont(ofSize: editorFontSize) | |
paragraphStyle.lineSpacing = 1.5 | |
} else { | |
editorFontSize = MarkdownTransformer.fontSize | |
codeFontSize = MarkdownTransformer.sansSerifFontSize | |
editorFont = UIFont(name: appConfig.editorFont.rawValue, size: editorFontSize)! | |
} | |
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.label, range: all) | |
attributedString.addAttribute(NSAttributedString.Key.font, value: editorFont, range: all) | |
attributedString.addAttribute(NSAttributedString.Key.paragraphStyle, value: paragraphStyle, range: all) | |
let inlineCodes = MarkdownTransformer.inlineCodeRegex.matches(in: text, options: [], range: all) | |
inlineCodes.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.font, value: UIFont.monospacedSystemFont(ofSize: codeFontSize, weight: .thin), range: $0.range) | |
} | |
let horizontalRules = MarkdownTransformer.horizontalRuleRegex.matches(in: text, options: [], range: all) | |
horizontalRules.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range) | |
} | |
let headings = MarkdownTransformer.headingRegex.matches(in: text, options: [], range: all) | |
headings.forEach { | |
var headerFont = UIFont.systemFont(ofSize: 0) | |
attributedString.enumerateAttributes(in: $0.range, options: []) { attributes, range, stop in | |
let fontAttribute = attributes.first { $0.key == .font }! | |
let font = fontAttribute.value as! UIFont | |
headerFont = font.with(.traitBold, .traitExpanded).withSize(editorFontSize * 1.25) | |
} | |
attributedString.addAttribute(NSAttributedString.Key.font, value: headerFont, range: $0.range) | |
attributedString.addAttribute(NSAttributedString.Key.kern, value: 0.5, range: $0.range) | |
} | |
let strongs = MarkdownTransformer.boldRegex.matches(in: text, options: [], range: all) | |
strongs.forEach { | |
var boldedFont: UIFont = UIFont.systemFont(ofSize: 0) | |
attributedString.enumerateAttributes(in: $0.range, options: []) { attributes, range, stop in | |
let fontAttribute = attributes.first { $0.key == .font }! | |
let font = fontAttribute.value as! UIFont | |
boldedFont = font.with(.traitBold) | |
} | |
attributedString.addAttribute(NSAttributedString.Key.font, value: boldedFont, range: $0.range) | |
} | |
let emphases = MarkdownTransformer.asteriskEmphasisRegex.matches(in: text, options: [], range: all) + MarkdownTransformer.underscoreEmphasisRegex.matches(in: text, options: [], range: all) | |
emphases.forEach { | |
var emphasizedFont: UIFont = UIFont.systemFont(ofSize: 0) | |
attributedString.enumerateAttributes(in: $0.range, options: [], using: { attributes, range, stop in | |
let fontAttribute = attributes.first { $0.key == .font }! | |
let font = fontAttribute.value as! UIFont | |
emphasizedFont = font.with(.traitItalic) | |
}) | |
attributedString.addAttribute(NSAttributedString.Key.font, value: emphasizedFont, range: $0.range) | |
} | |
let boldEmphases = MarkdownTransformer.boldEmphasisAsteriskRegex.matches(in: text, options: [], range: all) | |
boldEmphases.forEach { | |
var emphasizedFont: UIFont = UIFont.systemFont(ofSize: 0) | |
attributedString.enumerateAttributes(in: $0.range, options: [], using: { attributes, range, stop in | |
let fontAttribute = attributes.first { $0.key == .font }! | |
let font = fontAttribute.value as! UIFont | |
emphasizedFont = font.with(.traitItalic, .traitBold) | |
}) | |
attributedString.addAttribute(NSAttributedString.Key.font, value: emphasizedFont, range: $0.range) | |
} | |
let unorderedListItems = MarkdownTransformer.unorderedListRegex.matches(in: text, options: [], range: all) | |
unorderedListItems.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range) | |
} | |
let orderedListItems = MarkdownTransformer.orderedListRegex.matches(in: text, options: [], range: all) | |
orderedListItems.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range) | |
} | |
let linksAndImages = MarkdownTransformer.linkOrImageRegex.matches(in: text, options: [], range: all) | |
linksAndImages.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.underlineStyle, value: NSUnderlineStyle.single.rawValue, range: $0.range) | |
} | |
let codeBlocks = MarkdownTransformer.codeBlockRegex.matches(in: text, options: [], range: all) | |
codeBlocks.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.backgroundColor, value: UIColor.secondarySystemBackground, range: $0.range) | |
} | |
let buttons = MarkdownTransformer.buttonRegex.matches(in: text, options: [], range: all) | |
buttons.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.foregroundColor, value: UIColor.lightGray, range: $0.range) | |
} | |
let blockquotes = MarkdownTransformer.blockquoteRegex.matches(in: text, options: [], range: all) | |
blockquotes.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.backgroundColor, value: UIColor.secondarySystemBackground, range: $0.range) | |
} | |
let embeds = MarkdownTransformer.embedRegex.matches(in: text, options: [], range: all) | |
embeds.forEach { | |
attributedString.addAttribute(NSAttributedString.Key.underlineStyle, value: NSUnderlineStyle.single.rawValue, range: $0.range) | |
} | |
return attributedString | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment