|
/* |
|
Markdown-ish parser. Regular expressions are not allowed in here! |
|
|
|
The parser works in several stages: |
|
|
|
1. split up the text into lines |
|
2. combine lines into logical blocks |
|
3. tokenize the contents of the blocks |
|
|
|
The result of this process is a tree structure that describes the contents of |
|
the file. This tree can then be rendered to HTML, for example. |
|
*/ |
|
|
|
import Foundation |
|
|
|
// MARK: - String Methods |
|
|
|
extension String { |
|
func replace(s1: String, with s2: String) -> String { |
|
return self.stringByReplacingOccurrencesOfString(s1, withString: s2) |
|
} |
|
|
|
/* |
|
* HTML escapes a string. |
|
*/ |
|
func escape() -> String { |
|
var s = self |
|
s = s.replace("&", with: "&") |
|
s = s.replace("\"", with: """) |
|
s = s.replace("'", with: "'") |
|
s = s.replace("<", with: "<") |
|
s = s.replace(">", with: ">") |
|
return s |
|
} |
|
} |
|
|
|
// MARK: - Types |
|
|
|
struct Markdown { |
|
/* |
|
* The text from a Fragment is converted to a stream of Tokens. This allows |
|
* for the separation of actual text from the control characters that modify |
|
* the appearance of the text. |
|
*/ |
|
enum Token { |
|
case Spacing // any amount of whitespace |
|
case Text(text: String) // words |
|
case Escape(symbol: String) // --, <, >, ', & and so on |
|
case Open(symbol: String) // ", *, **, ~~, ` spans |
|
case Close(symbol: String) |
|
case Link(fragment: Fragment, url: String) |
|
case Image(caption: String, url: String) |
|
} |
|
|
|
/* |
|
* Roughly speaking, each line in the input document corresponds to a fragment, |
|
* but the fragment will have any whitespace trimmed off. The last newline is |
|
* also stripped. If the line starts a new block, such as `- list item` then |
|
* the `-` is also stripped from the fragment. |
|
* |
|
* Note: Source code is not tokenized at the moment, but could be in the future |
|
* to support syntax highlighting. (Note: .Code blocks consist of only a single |
|
* fragment, so such fragments may include newlines.) |
|
*/ |
|
enum Fragment { |
|
case Text(tokens: [Token]) |
|
case Code(code: String) |
|
} |
|
|
|
/* |
|
* The possible types of top-level blocks. |
|
*/ |
|
enum BlockType { |
|
case Empty // just for parsing |
|
case Header(level: Int) // # |
|
case Text // regular paragraph of text |
|
case Quote // > |
|
case Code(language: String) // ```language |
|
case CodeIndented // 4 spaces or tab |
|
case ListItem(ordered: Bool) // 1. or - |
|
} |
|
|
|
/* |
|
* Describes a top-level block. The lines from the input document are grouped |
|
* into such blocks. Each block will have one or more Fragments. |
|
*/ |
|
struct Block { |
|
var type: BlockType |
|
var fragments: [Fragment] = [] |
|
|
|
init(type: BlockType) { |
|
self.type = type |
|
} |
|
} |
|
|
|
private let input: String // the Markdown text |
|
private var blocks: [Block] = [] // the top-level blocks |
|
} |
|
|
|
// MARK: - Public API |
|
|
|
extension Markdown { |
|
init(string input: String) { |
|
self.input = input |
|
blocks = parseBlocks(parseLines()) |
|
} |
|
|
|
func tree() -> [Block] { |
|
return blocks |
|
} |
|
|
|
mutating func removeBlockAtIndex(index: Int) { |
|
blocks.removeAtIndex(index) |
|
} |
|
} |
|
|
|
// MARK: - Workarounds for Swift Issues |
|
|
|
/* |
|
* For debugging only. Letting Swift do this automatically doesn't work |
|
* very dependably yet (Xcode 7 beta 5). |
|
*/ |
|
extension Markdown.Token: CustomStringConvertible { |
|
var description: String { |
|
switch self { |
|
case .Spacing: return "Spacing" |
|
case .Text(let text): return text |
|
case .Escape(let symbol): return "Escape(\(symbol))" |
|
case .Open(let symbol): return "Open(\(symbol))" |
|
case .Close(let symbol): return "Close(\(symbol))" |
|
case .Link(let fragment, let url): return "Link(\(fragment),\(url))" |
|
case .Image(let caption, let url): return "Image(\(caption),\(url))" |
|
} |
|
} |
|
} |
|
|
|
extension Markdown.BlockType: CustomStringConvertible { |
|
var description: String { |
|
switch self { |
|
case .Empty: return "@Empty" |
|
case .Header(let level): return "@Header:\(level)" |
|
case .Text: return "@Text" |
|
case .Code(let language): return "@Code:\(language)" |
|
case .CodeIndented: return "@CodeIndented" |
|
case .Quote: return "@Quote" |
|
case .ListItem(let ordered): return "@ListItem:\(ordered)" |
|
} |
|
} |
|
} |
|
|
|
/* |
|
* These helper methods are necessary because `if case` cannot be combined with |
|
* other conditions. |
|
* |
|
* In Swift 2.0 you can't write: if case .Quote = foo && bar { ... } |
|
* or: if !case .Empty |
|
* or: if case .Empty || case .Header || case .Code |
|
* |
|
* But at least you can now write `if foo.isQuote() && bar { ... }` |
|
*/ |
|
private extension Markdown.BlockType { |
|
func isEmpty() -> Bool { |
|
if case .Empty = self { return true } else { return false } |
|
} |
|
|
|
func isCode() -> Bool { |
|
if case .Code = self { return true } else { return false } |
|
} |
|
|
|
func isQuote() -> Bool { |
|
if case .Quote = self { return true } else { return false } |
|
} |
|
|
|
func isListItem() -> Bool { |
|
if case .ListItem = self { return true } else { return false } |
|
} |
|
|
|
func isOrderedList() -> Bool { |
|
if case ListItem(let ordered) = self { return ordered } else { return false } |
|
} |
|
|
|
func isEmpty_Header_Code() -> Bool { |
|
switch self { |
|
case .Empty, .Header, .Code: return true |
|
default: return false |
|
} |
|
} |
|
|
|
func isText_CodeIndented() -> Bool { |
|
switch self { |
|
case .Text, .CodeIndented: return true |
|
default: return false |
|
} |
|
} |
|
|
|
func shouldTrimWhitespace() -> Bool { |
|
switch self { |
|
case .Code, .CodeIndented: return false |
|
default: return true |
|
} |
|
} |
|
} |
|
|
|
// MARK: - Supporting Methods |
|
|
|
private extension Character { |
|
func isWhitespace() -> Bool { |
|
return self == " " || self == "\t" || self == "\r" |
|
} |
|
|
|
func isWhitespaceOrNewline() -> Bool { |
|
return isWhitespace() || self == "\n" |
|
} |
|
} |
|
|
|
private extension Markdown { |
|
func eatLeadingWhitespace(startIndex: String.Index, _ endIndex: String.Index) -> String.Index { |
|
for var i = startIndex; i < endIndex; i = i.successor() { |
|
if !input[i].isWhitespaceOrNewline() { return i } |
|
} |
|
return endIndex |
|
} |
|
|
|
func eatTrailingWhitespace(startIndex: String.Index, _ endIndex: String.Index) -> String.Index { |
|
for var i = endIndex.predecessor(); i >= startIndex; i = i.predecessor() { |
|
if !input[i].isWhitespaceOrNewline() { return i.successor() } |
|
} |
|
return startIndex |
|
} |
|
} |
|
|
|
// MARK: - Splitting Into Lines |
|
|
|
private extension Markdown { |
|
/* |
|
* Determine the indices in the input document at which new lines begin. |
|
*/ |
|
func parseLines() -> [String.Index] { |
|
var lines: [String.Index] = [] |
|
var i = input.startIndex |
|
while i < input.endIndex { |
|
let c = input[i] |
|
i = i.successor() |
|
|
|
// For convenience, the end of the string is recorded twice so the block |
|
// scanning logic doesn't need a separate check for end-of-text. When the |
|
// end is reached, it simply sees one final .Empty block. |
|
// It may not be immediately obvious, but the check for endIndex here |
|
// makes sure this happens whether the text ends with a newline or not. |
|
if c == "\n" || i == input.endIndex { |
|
lines.append(i) |
|
} |
|
} |
|
lines.append(i) // add the endIndex again |
|
return lines |
|
} |
|
} |
|
|
|
// MARK: - Top-Level Blocks |
|
|
|
private extension Markdown { |
|
/* |
|
* Determine the top-level blocks in the file. |
|
*/ |
|
func parseBlocks(lines: [String.Index]) -> [Block] { |
|
var blocks: [Block] = [] |
|
var lineStart = input.startIndex |
|
var fragmentStart = lineStart |
|
var fragmentEnd = lineStart |
|
var block = Block(type: .Empty) |
|
var count = 0 |
|
|
|
for lineEnd in lines { |
|
// This looks at the next line. If the type of this line is different from |
|
// the current block, then we may need to end the block and make a new one. |
|
// Exactly how depends on the particular block type. The "adjusted" start |
|
// index is for skipping the symbol that identifies the line. |
|
var (nextType, adjustedLineStart) = identify(lineStart, lineEnd) |
|
|
|
// A fragment should not have leading or trailing whitespace or a newline. |
|
if nextType.shouldTrimWhitespace() { |
|
adjustedLineStart = eatLeadingWhitespace(adjustedLineStart, lineEnd) |
|
} |
|
let adjustedLineEnd = eatTrailingWhitespace(adjustedLineStart, lineEnd) |
|
|
|
//print("LINE '" + input.substringWithRange(lineStart ..< lineEnd) + "' is type \(nextType)") |
|
|
|
// Because the endIndex appears twice in the array, we can easily detect |
|
// whether scanning has reached the end of the input. (By the way, the |
|
// type of that last "line" is .Empty, so when checking for the Empty state |
|
// we don't need to look at endOfInput also.) |
|
let endOfInput = (adjustedLineStart == input.endIndex) |
|
|
|
// It may be a bit weird to define inner functions here, but this allows |
|
// them to use variables such as `nextType` and `block`, without passing |
|
// those as parameters. |
|
|
|
func beginNewBlock() { |
|
block = Block(type: nextType) |
|
fragmentStart = adjustedLineStart |
|
count = 0 |
|
} |
|
|
|
func addTextFragment() { |
|
let tokenized = tokenize(startIndex: fragmentStart, endIndex: fragmentEnd) |
|
let fragment = Fragment.Text(tokens: tokenized) |
|
block.fragments.append(fragment) |
|
} |
|
|
|
func addCodeFragment() { |
|
let s = input.substringWithRange(fragmentStart ..< fragmentEnd) |
|
let fragment = Fragment.Code(code: s) |
|
block.fragments.append(fragment) |
|
} |
|
|
|
func finishBlock() { |
|
blocks.append(block) |
|
} |
|
|
|
// The current fragment always refers to the previous line(s). It is given |
|
// by `fragmentStart` and `lineStart`, which is the end of the previous line |
|
// and also the start of this one. So we don't immediately add new fragments, |
|
// we always want to look at the next line first. |
|
|
|
switch block.type { |
|
case .Empty: |
|
if !nextType.isEmpty() { |
|
beginNewBlock() |
|
} |
|
|
|
case .Header: |
|
// A header is always just one line, so we can immediately add this block. |
|
addTextFragment() |
|
finishBlock() |
|
beginNewBlock() |
|
|
|
case .Text: |
|
addTextFragment() |
|
|
|
// A text block ends when the next line is empty, a header, or code. |
|
if nextType.isEmpty_Header_Code() { |
|
finishBlock() |
|
beginNewBlock() |
|
} else { |
|
fragmentStart = lineStart |
|
} |
|
|
|
case .Quote: |
|
// Each line in a quote is added as a new fragment, and we strip off |
|
// the leading > character. |
|
addTextFragment() |
|
|
|
// A quote ends when the next line is empty, a header, or code. |
|
if nextType.isEmpty_Header_Code() { |
|
blocks.append(block) |
|
beginNewBlock() |
|
} else { |
|
// Any other type of line also gets added to the quote. If it |
|
// starts with >, we strip that off. |
|
if case .Quote = nextType { |
|
fragmentStart = adjustedLineStart |
|
} else { |
|
fragmentStart = lineStart |
|
} |
|
} |
|
|
|
case .Code: |
|
// A code block ends after a closing line of ``` backticks. |
|
if nextType.isCode() || endOfInput { |
|
addCodeFragment() |
|
finishBlock() |
|
block = Block(type: .Empty) |
|
} |
|
|
|
case .CodeIndented: |
|
// Each line in an indented code block is added as a new fragment, |
|
// allowing us to strip off the leading spaces/tabs. |
|
if count == 0 { |
|
addCodeFragment() |
|
} |
|
|
|
// If the next line is empty, keep going. If the empty line(s) is/are |
|
// followed by more code, then we'll insert empty fragments. |
|
if nextType.isEmpty() && !endOfInput { |
|
++count |
|
fragmentStart = lineEnd.predecessor() |
|
} else if case .CodeIndented = nextType { |
|
// If the next line is also an indented code block, then keep going. |
|
// If we've seen empty lines, then add an empty fragment for each line. |
|
if count > 0 { |
|
fragmentEnd = fragmentStart |
|
for _ in 1...count { addCodeFragment() } |
|
count = 0 |
|
} |
|
fragmentStart = adjustedLineStart |
|
} else { |
|
// If the next line is any other kind of block, then the indented |
|
// code block has ended. |
|
finishBlock() |
|
beginNewBlock() |
|
} |
|
|
|
case .ListItem: |
|
// Each line in a list item is added as a new fragment, and we strip |
|
// off the leading - character. |
|
addTextFragment() |
|
|
|
// If the next line is text or indented code, then interpret this as |
|
// another fragment that also belongs to this list item. |
|
if nextType.isText_CodeIndented() { |
|
fragmentStart = adjustedLineStart |
|
} else { |
|
blocks.append(block) |
|
beginNewBlock() |
|
} |
|
} |
|
|
|
lineStart = lineEnd |
|
fragmentEnd = adjustedLineEnd |
|
} |
|
return blocks |
|
} |
|
} |
|
|
|
// MARK: - Line Identification |
|
|
|
private extension Markdown { |
|
typealias LineType = (BlockType, String.Index) |
|
|
|
/* |
|
* Scans the beginning of the line in order to identify what sort of line |
|
* this is. |
|
* |
|
* Returns a new String.Index that points at the beginning of the actual text, |
|
* having skipped the identifying characters (#, .1, -) but not necessarily |
|
* any whitespace. |
|
*/ |
|
func identify(startIndex: String.Index, _ endIndex: String.Index) -> LineType { |
|
var i = startIndex |
|
|
|
func header() -> LineType { |
|
let textStart = i |
|
var count = 1 |
|
i = i.successor() |
|
for ; i < endIndex; i = i.successor() { |
|
if input[i] == "#" { |
|
++count |
|
} else { |
|
let headerStart = i |
|
i = i.successor() |
|
for ; i < endIndex; i = i.successor() { |
|
if !input[i].isWhitespaceOrNewline() { |
|
return (.Header(level: count), headerStart) |
|
} |
|
} |
|
break |
|
} |
|
} |
|
return (.Text, textStart) // a # by itself |
|
} |
|
|
|
func code() -> LineType { |
|
let textStart = i |
|
i = i.successor() |
|
if i < endIndex && input[i] == "`" { |
|
i = i.successor() |
|
if i < endIndex && input[i] == "`" { |
|
i = i.successor() |
|
let j = endIndex.predecessor() // not true for very last line if no newline |
|
if i < j { |
|
return (.Code(language: input.substringWithRange(i ..< j)), endIndex) |
|
} else { |
|
return (.Code(language: ""), endIndex) |
|
} |
|
} |
|
} |
|
return (.Text, textStart) |
|
} |
|
|
|
func unorderedListItem() -> LineType { |
|
let textStart = i |
|
i = i.successor() |
|
if i < endIndex && input[i].isWhitespace() { |
|
return (.ListItem(ordered: false), i) |
|
} |
|
return (.Text, textStart) |
|
} |
|
|
|
func orderedListItem() -> LineType { |
|
let textStart = i |
|
i = i.successor() |
|
if i < endIndex && input[i] == "." { |
|
i = i.successor() |
|
if i < endIndex && input[i].isWhitespace() { |
|
return (.ListItem(ordered: true), i.successor()) |
|
} |
|
} |
|
return (.Text, textStart) |
|
} |
|
|
|
func identifier() -> LineType { |
|
switch input[i] { |
|
case "#": |
|
return header() |
|
case ">": |
|
return (.Quote, i.successor()) |
|
case "`": |
|
return code() |
|
case "-": |
|
return unorderedListItem() |
|
case "1", "2", "3", "4", "5", "6", "7", "8", "9": |
|
return orderedListItem() |
|
default: |
|
return (.Text, i) |
|
} |
|
} |
|
|
|
func whitespace() -> LineType { |
|
var count = 0 |
|
var codeStart = i |
|
for ; i < endIndex; i = i.successor() { |
|
switch input[i] { |
|
case "\n": |
|
break |
|
case " ", "\r": |
|
count += 1 |
|
case "\t": |
|
count += 4 |
|
default: |
|
if count >= 4 { |
|
return (.CodeIndented, codeStart) |
|
} else { |
|
return identifier() |
|
} |
|
} |
|
if count == 4 { |
|
codeStart = i.successor() |
|
} |
|
} |
|
return (.Empty, endIndex) |
|
} |
|
|
|
if i == endIndex || input[i] == "\n" { |
|
return (.Empty, endIndex) |
|
} else { |
|
switch input[i] { |
|
case " ", "\t", "\r": |
|
return whitespace() |
|
default: |
|
return identifier() |
|
} |
|
} |
|
} |
|
} |
|
|
|
// MARK: - Tokenization of Fragments |
|
|
|
private extension Markdown { |
|
/* |
|
* The tokenizer takes in a text fragment and outputs a stream of `Token` objects. |
|
* |
|
* For example, the input: |
|
* aaa **x** |
|
* I've |
|
* |
|
* becomes the following stream of tokens: Text(aaa) Spacing Open(bold) Text(x) |
|
* Close(Bold) Newline Text(I) Escape(') Text(ve) |
|
* |
|
* Multiple spaces get combined into a single token. |
|
*/ |
|
func tokenize(startIndex startIndex: String.Index, endIndex: String.Index) -> [Token] { |
|
var tokens = [Token]() |
|
var i = startIndex // the lookahead character |
|
|
|
func addTextToken(s: String) { |
|
tokens.append(Token.Text(text: s)) |
|
} |
|
|
|
func addEscapeToken(s: String) { |
|
tokens.append(Token.Escape(symbol: s)) |
|
} |
|
|
|
func spacing() { |
|
for ; i < endIndex; i = i.successor() { |
|
if !input[i].isWhitespace() { break } |
|
} |
|
tokens.append(Token.Spacing) |
|
} |
|
|
|
func escapeDash() { |
|
if i < endIndex && input[i] == "-" { |
|
addEscapeToken("--") |
|
i = i.successor() |
|
} else { |
|
addTextToken("-") |
|
} |
|
} |
|
|
|
func escapeEllipsis() { |
|
if i < endIndex && input[i] == "." { |
|
i = i.successor() |
|
if i < endIndex && input[i] == "." { |
|
i = i.successor() |
|
addEscapeToken("...") |
|
} else { |
|
addTextToken("..") |
|
} |
|
} else { |
|
addTextToken(".") |
|
} |
|
} |
|
|
|
func escape() { |
|
let c = input[i] |
|
i = i.successor() |
|
if c == "-" { |
|
escapeDash() |
|
} else if c == "." { |
|
escapeEllipsis() |
|
} else { |
|
addEscapeToken(String(c)) |
|
} |
|
} |
|
|
|
var seenOpen = [String: Bool]() |
|
|
|
func addOpenOrCloseToken(symbol: String, strict: Bool = false) { |
|
// The official rules are that *word* and *word1 word2* will work but |
|
// not * word *, *word *, or * word*. To keep things simple, we only |
|
// require that the opening * is followed by non-whitespace; where the |
|
// closing * is doesn't matter. |
|
if let open = seenOpen[symbol] where open { |
|
seenOpen[symbol] = false |
|
tokens.append(Token.Close(symbol: symbol)) |
|
} else if strict && (i == endIndex || input[i].isWhitespace()) { |
|
addTextToken(symbol) |
|
} else { |
|
seenOpen[symbol] = true |
|
tokens.append(Token.Open(symbol: symbol)) |
|
} |
|
} |
|
|
|
func strikethrough() { |
|
if i < endIndex && input[i] == "~" { |
|
i = i.successor() |
|
addOpenOrCloseToken("~~") |
|
} else { |
|
addTextToken("~") |
|
} |
|
} |
|
|
|
// Note that italics, bold, etc, do no work across multiple lines. |
|
// That happens because we tokenize each fragment individually and we reset |
|
// the tokenization state with each new fragment. |
|
func italicsOrBold() { |
|
if i < endIndex && input[i] == "*" { |
|
i = i.successor() |
|
addOpenOrCloseToken("**", strict: true) |
|
} else { |
|
addOpenOrCloseToken("*", strict: true) |
|
} |
|
} |
|
|
|
func span() { |
|
let c = input[i] |
|
i = i.successor() |
|
if c == "*" { |
|
italicsOrBold() |
|
} else if c == "~" { |
|
strikethrough() |
|
} else { |
|
addOpenOrCloseToken(String(c)) |
|
} |
|
} |
|
|
|
func backslash() { |
|
i = i.successor() |
|
if i < endIndex { |
|
let c = input[i] |
|
if c == "*" || c == "[" || c == "]" { |
|
addTextToken(String(c)) |
|
i = i.successor() |
|
return |
|
} |
|
} |
|
addTextToken("\\") |
|
} |
|
|
|
func parseLink() -> ((String.Index, String.Index), (String.Index, String.Index))? { |
|
let textStart = i.successor() |
|
|
|
// Loop until we find ] followed by ( followed by ). It's not a real link |
|
// unless it has text and a valid URL, but we're not that picky. |
|
// To be honest, this is where a regexp is the simpler solution. ;-) |
|
for ; i < endIndex; i = i.successor() { |
|
if input[i] == "]" { |
|
let textEnd = i |
|
i = i.successor() |
|
if i < endIndex && input[i] == "(" { |
|
i = i.successor() |
|
let urlStart = i |
|
for ; i < endIndex; i = i.successor() { |
|
if input[i] == ")" { |
|
let urlEnd = i |
|
i = i.successor() |
|
return ((eatLeadingWhitespace(textStart, textEnd), |
|
eatTrailingWhitespace(textStart, textEnd)), |
|
(eatLeadingWhitespace(urlStart, urlEnd), |
|
eatTrailingWhitespace(urlStart, urlEnd))) |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// This does not appear to be a validly formatted link. |
|
addTextToken("[") |
|
i = textStart |
|
return nil |
|
} |
|
|
|
func link() { |
|
if let ((textStart, textEnd), (urlStart, urlEnd)) = parseLink() { |
|
let tokenized = tokenize(startIndex: textStart, endIndex: textEnd) |
|
let fragment = Fragment.Text(tokens: tokenized) |
|
let url = input.substringWithRange(urlStart ..< urlEnd) |
|
let token = Token.Link(fragment: fragment, url: url) |
|
tokens.append(token) |
|
} |
|
} |
|
|
|
func image() { |
|
i = i.successor() |
|
if i < endIndex && input[i] == "[" { |
|
if let ((captionStart, captionEnd), (urlStart, urlEnd)) = parseLink() { |
|
let caption = input.substringWithRange(captionStart ..< captionEnd) |
|
let url = input.substringWithRange(urlStart ..< urlEnd) |
|
let token = Token.Image(caption: caption, url: url) |
|
tokens.append(token) |
|
} |
|
} else { |
|
addTextToken("!") |
|
} |
|
} |
|
|
|
func endsWord(c: Character) -> Bool { |
|
return c == " " || c == "\t" || c == "\n" || c == "\r" || // whitespace |
|
c == "'" || c == "<" || c == ">" || c == "&" || c == "-" || c == "." || // escapes |
|
c == "`" || c == "\"" || c == "*" || c == "~" || // span |
|
c == "\\" || c == "[" || c == "!" |
|
} |
|
|
|
func word() { |
|
let wordStart = i |
|
for ; i < endIndex; i = i.successor() { |
|
if endsWord(input[i]) { break } |
|
} |
|
addTextToken(input.substringWithRange(wordStart ..< i)) |
|
} |
|
|
|
while i < endIndex { |
|
switch input[i] { |
|
case " ", "\t", "\r": |
|
spacing() |
|
case "'", "<", ">", "&", "-", ".": |
|
escape() |
|
case "`", "\"", "*", "~": |
|
span() |
|
case "\\": |
|
backslash() |
|
case "[": |
|
link() |
|
case "!": |
|
image() |
|
case "\n": |
|
fatalError("fragments should not contain newlines") |
|
default: |
|
word() |
|
} |
|
} |
|
return tokens |
|
} |
|
} |
|
|
|
// MARK: - HTML Rendering |
|
|
|
private let escapeTable = [ |
|
"'": "’", |
|
"<": "<", |
|
">": ">", |
|
"&": "&", |
|
"-": "–", |
|
"--": "—", |
|
"...": "…", |
|
] |
|
|
|
private let openTable = [ |
|
"`": "<code>", |
|
"\"": "“", |
|
"*": "<em>", |
|
"**": "<strong>", |
|
"~~": "<del>", |
|
] |
|
|
|
private let closeTable = [ |
|
"`": "</code>", |
|
"\"": "”", |
|
"*": "</em>", |
|
"**": "</strong>", |
|
"~~": "</del>", |
|
] |
|
|
|
private extension Markdown.Token { |
|
func toHTML() -> String { |
|
switch self { |
|
case .Spacing: return " " |
|
case .Text(let text): return text |
|
case .Escape(let symbol): return escapeTable[symbol]! |
|
case .Open(let symbol): return openTable[symbol]! |
|
case .Close(let symbol): return closeTable[symbol]! |
|
case .Link(let fragment, let url): |
|
return "<a href=\"\(url.escape())\">" + fragment.toHTML() + "</a>" |
|
case .Image(let caption, let url): |
|
return "<img src=\"\(url.escape())\" alt=\"\(caption.escape())\">" |
|
} |
|
} |
|
} |
|
|
|
private extension Markdown.Fragment { |
|
func toHTML() -> String { |
|
switch self { |
|
case .Text(let tokens): |
|
var s = "" |
|
for token in tokens { |
|
s += token.toHTML() |
|
} |
|
return s |
|
case .Code(let string): |
|
return string.escape() |
|
} |
|
} |
|
|
|
// If the text block contains only one image token, then we turn it into |
|
// a <figure> instead of a <p> paragraph. |
|
func isImageOnly() -> Bool { |
|
if case .Text(let tokens) = self where tokens.count == 1, case .Image = tokens[0] { |
|
return true |
|
} else { |
|
return false |
|
} |
|
} |
|
} |
|
|
|
private extension Markdown.Block { |
|
func isImageOnly() -> Bool { |
|
return fragments.count == 1 && fragments.first!.isImageOnly() |
|
} |
|
|
|
func formatFragments(separator: String = "<br>\n") -> String { |
|
precondition(fragments.count > 0) |
|
if fragments.count == 1 { |
|
return fragments.first!.toHTML() |
|
} else { |
|
return separator.join(fragments.map { $0.toHTML() }) |
|
} |
|
} |
|
|
|
func toHTML() -> String { |
|
switch type { |
|
case .Empty: |
|
fatalError("should not happen") |
|
|
|
case .Header(let level): |
|
return "<h\(level)>" + formatFragments() + "</h\(level)>\n\n" |
|
|
|
case .Text: |
|
if isImageOnly() { |
|
return "<figure class=\"image\">" + formatFragments() + "</figure>\n\n" |
|
} else { |
|
return "<p>" + formatFragments("\n") + "</p>\n\n" |
|
} |
|
|
|
case .Quote: |
|
return "<p>" + formatFragments() + "</p>\n" |
|
|
|
case .Code(let language): |
|
var s = "<figure class=\"code\"><pre><code" |
|
if !language.isEmpty { |
|
s += " class=\"\(language.escape())\"" |
|
} |
|
s += ">" + formatFragments() + "</code></pre></figure>\n\n" |
|
return s |
|
|
|
case .CodeIndented: |
|
return "<figure class=\"code\"><pre><code>" + formatFragments("\n") + "</code></pre></figure>\n\n" |
|
|
|
case .ListItem: |
|
return "<li>" + formatFragments() + "</li>\n" |
|
} |
|
} |
|
} |
|
|
|
extension Markdown { |
|
func renderHTML() -> String { |
|
var s = "" |
|
var lastType = BlockType.Empty |
|
var wasOrderedList = false |
|
|
|
for block in blocks { |
|
if !lastType.isQuote() && block.type.isQuote() { |
|
s += "<blockquote>\n" |
|
} else if lastType.isQuote() && !block.type.isQuote() { |
|
s += "</blockquote>\n\n" |
|
} |
|
|
|
if !lastType.isListItem() && block.type.isListItem() { |
|
wasOrderedList = block.type.isOrderedList() |
|
s += wasOrderedList ? "<ol>\n" : "<ul>\n" |
|
} else if lastType.isListItem() && !block.type.isListItem() { |
|
s += wasOrderedList ? "</ol>\n\n" : "</ul>\n\n" |
|
} |
|
|
|
s += block.toHTML() |
|
lastType = block.type |
|
} |
|
return s |
|
} |
|
} |