Skip to content

Instantly share code, notes, and snippets.

Created November 26, 2023 21:55
Show Gist options
  • Save douglashill/1f8ed210db3dfc1d4dac4f05061ceadf to your computer and use it in GitHub Desktop.
Save douglashill/1f8ed210db3dfc1d4dac4f05061ceadf to your computer and use it in GitHub Desktop.
Writes a Markdown document to HTML. The primary goal is for output to match the output of 1.0.1 as closely as possible.
// `@testable` needed to access `_data`
@testable import Markdown // This package:
/// Writes a Markdown document to HTML.
/// The primary goal is for output to match the output of 1.0.1 as closely as possible.
/// <>
/// Usage:
/// ```swift
/// let document = Document(parsing: "Convert *Markdown* to **HTML**.", options: [.disableSmartOpts])
/// var htmlFormatter = HTMLFormatter()
/// htmlFormatter.visit(document)
/// print(htmlFormatter.output) // <p>Convert <em>Markdown</em> to <strong>HTML</strong>.</p>
/// ```
/// Swift Markdown makes all quotation marks curly, but they shouldn’t be for foot and inch marks
/// so parse the document using the `.disableSmartOpts` option.
/// See <>
/// Known discrepancies from
/// - All instances of > will be replaced with &gt instead of only some.
/// - Nested lists won’t include as many line breaks.
/// - There won’t be empty lines between multiple paragraphs in a list item.
struct HTMLFormatter: MarkupVisitor {
var output = ""
mutating func defaultVisit(_ markup: Markup) -> Void {
let tag: String
var attributes: [(name: String, value: String)] = []
if markup is Strong {
tag = "strong"
} else if markup is Emphasis {
tag = "em"
} else if markup is Paragraph {
tag = "p"
} else if markup is Document {
tag = "body"
} else if markup is BlockQuote {
tag = "blockquote"
} else if markup is ListItem {
tag = "li"
} else if markup is OrderedList {
tag = "ol"
} else if markup is UnorderedList {
tag = "ul"
} else if let heading = markup as? Heading {
tag = "h\(heading.level)"
} else if let link = markup as? Link {
if let destination = link.destination {
attributes.append((name: "href", value: destination))
tag = "a"
} else {
fatalError("Unknown node: \(String(describing: type(of: markup)))")
let skipTag = markup is Paragraph && markup.parent is ListItem && (markup.parent!.parent! as! ListItemContainer).skipParagraphsInItems
if !skipTag {
output += "<\(tag)"
for attribute in attributes {
output += " \(\"\(attribute.value.replacingWithHTMLEntities())\""
output += ">"
if markup is ListItemContainer {
output += "\n"
if markup is BlockQuote {
// Indent only the first child of a blockquote to match
output += "\n "
for child in markup.children {
if markup is BlockQuote && output.last == "\n" {
if !skipTag {
output += "</\(tag)>"
if markup is ListItem {
output += "\n"
} else if markup is BlockMarkup && !(markup is Paragraph && markup.parent is ListItem) {
output += "\n\n"
mutating func visitDocument(_ document: Document) -> () {
// Adding a <body> tag is usually not useful.
for child in document.children {
mutating func visitImage(_ image: Image) -> () {
var altText: String?
for child in image.children {
precondition(altText == nil, "Image node has too many children. Should just be one Text node.")
altText = (child as! Text).string
output += "<img src=\"\(image.source ?? "")\" alt=\"\(altText ?? "")\" title=\"\(image.title ?? "")\" />"
mutating func visitInlineCode(_ inlineCode: InlineCode) -> () {
for _ in inlineCode.children {
fatalError("Inline code node should not have any children.")
output += "<code>\(inlineCode.code.replacingWithHTMLEntities())</code>"
mutating func visitText(_ markup: Text) -> Void {
for _ in markup.children {
fatalError("Text node should not have any children.")
output += markup.string.replacingWithHTMLEntities()
mutating func visitCodeBlock(_ codeBlock: CodeBlock) -> () {
output += "<pre><code>\(codeBlock.code.replacingWithHTMLEntities())</code></pre>\n\n"
mutating func visitHTMLBlock(_ html: HTMLBlock) -> () {
// List taken from _HashHTMLBlocks in
let blockElements = [
for blockElement in blockElements {
if html.rawHTML.hasPrefix("<\(blockElement)") {
output += "\(html.rawHTML.replacingOccurrences(of: "\t", with: " "))\n"
// Otherwise it’s an inline element, so wrap it in a paragraph.
var editableHTML = html.rawHTML
output += "<p>\(editableHTML.replacingOccurrences(of: "\t", with: " "))</p>\n\n"
mutating func visitInlineHTML(_ html: InlineHTML) -> () {
output += html.rawHTML
mutating func visitSoftBreak(_ softBreak: SoftBreak) -> () {
// This seems to come up when there’s no empty line.
output += "\n"
mutating func visitLineBreak(_ lineBreak: LineBreak) -> () {
output += " <br />\n"
mutating func visitThematicBreak(_ thematicBreak: ThematicBreak) -> () {
output += "<hr />\n\n"
private extension String {
func replacingWithHTMLEntities() -> String {
self.replacingOccurrences(of: "&", with: "&amp;")
.replacingOccurrences(of: "<", with: "&lt;")
// With inputs that look very similar, will usually replace > with the entity but sometimes won’t.
// I don’t understand the implementation.
// Mostly this entity should not be needed, but replace it anyway to be more similar to
.replacingOccurrences(of: ">", with: "&gt;")
// Like _Detab in
.replacingOccurrences(of: "\t", with: " ")
private extension ListItemContainer {
/// Whether to ignore the `Paragraph` children of the `ListItem` children of this list.
/// Swift Markdown always puts a `Paragraph` inside a `ListItem` even when there shouldn’t be one. Have to use internals to distinguish this case.
/// This is not very robust.
var skipParagraphsInItems: Bool {
var lastStartLine: Int?
var lineDifference: Int?
// Enumerating all children results in quadratic complexity because this property will be read for each child.
for child in children {
let currentStartLine = child._data.range!.lowerBound.line
if let lastStartLine {
let currentLineDifference = currentStartLine - lastStartLine
if let lineDifference {
if lineDifference != currentLineDifference {
return true
} else {
lineDifference = currentLineDifference
lastStartLine = currentStartLine
guard let lineDifference else {
// Single item lists. Don’t include paragraph to match
return true
return lineDifference == 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment