-
-
Save kballenegger/29dabe4b6e762ee221df to your computer and use it in GitHub Desktop.
Macchiato Markdown Parsing Engine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** Definition section ***/ | |
%{ | |
/* C code to be copied verbatim */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include "markdown.lexer.h" | |
#define TOKEN(type) \ | |
BEGIN(neutral); \ | |
concat_previous_text_token(); \ | |
record_token(yytext, yyleng, type) | |
#define T_TEXT_BUFFER_SIZE_INCREMENT 1024 | |
#define MULTIBYTE_RANGE_ADJUSTING_ENABLED 1 | |
void concat_previous_text_token(void); | |
void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type); | |
int update_multibytes_encountered(char *text, unsigned long len); | |
void record_token(char *text, unsigned long length, enum TokenType type); | |
// global state | |
NSUInteger token_index = 0; | |
TokenLexedBlock token_callback_block = nil; | |
static dispatch_queue_t lexer_queue; | |
static dispatch_queue_t calling_queue; | |
int multibytes_encountered = 0; | |
// concatenation of text tokens | |
char *last_text_token = NULL; | |
int last_text_token_len = 0; | |
int last_text_token_buffer_size = 0; | |
enum TokenType last_text_token_token_type = t_text; | |
%} | |
/*** Regex definition section ***/ | |
em "*"|"_" | |
rule ^"---"|"***"|"==="$ | |
symbol_bullet " "{0,3}("•"|"-"|"*"|"✓"|"✗")(" "|\t) | |
number_bullet " "{0,3}([0-9]+("."|")"))(" "|\t) | |
bullet {symbol_bullet}|{number_bullet} | |
indent \t|" "{4} | |
punctuation "."|","|"?"|"!"|"-"|"–"|"—"|"/"|"\\"|"'"|"\""|":"|";"|"("|")"|"["|"]"|"|" | |
/* This tells flex to read only one input file */ | |
%option noyywrap | |
/* states */ | |
%s neutral | |
%s after_bullet | |
%s in_paren | |
%s in_bracket | |
%% | |
/*** Rules section ***/ | |
<in_paren>")" { TOKEN(t_close_paren); } | |
<in_paren>. { append_to_text_token_buffer(yytext, yyleng, t_paren_text); BEGIN(in_paren); } | |
<in_bracket>"]" { TOKEN(t_close_bracket); } | |
<in_bracket>. { append_to_text_token_buffer(yytext, yyleng, t_bracket_text); BEGIN(in_bracket); } | |
{rule} { TOKEN(t_rule); } | |
^{bullet} { TOKEN(t_bullet); BEGIN(after_bullet); } | |
{em}{3} { TOKEN(t_strong_emphasis); } | |
{em}{2} { TOKEN(t_strong); } | |
{em} { TOKEN(t_emphasis); } | |
: { TOKEN(t_colon); } | |
``` { TOKEN(t_triple_backtick); } | |
` { TOKEN(t_backtick); } | |
^{indent} { TOKEN(t_indent); } | |
^">" { TOKEN(t_quotemark); } | |
"!" { TOKEN(t_bang); } | |
"(" { TOKEN(t_open_paren); BEGIN(in_paren); } | |
"[" { TOKEN(t_open_bracket); BEGIN(in_bracket); } | |
^"-"+$ { TOKEN(t_headline_underline); } | |
^"="+$ { TOKEN(t_headline_double_underline); } | |
^"#"{1,6} { TOKEN(t_headline_mark); } | |
"#"{1,6}$ { TOKEN(t_headline_mark); } | |
" \n" { TOKEN(t_hard_newline); } | |
"\n" { TOKEN(t_newline); } | |
" " { TOKEN(t_space); } | |
{punctuation} { TOKEN(t_punctuation); } | |
. { append_to_text_token_buffer(yytext, yyleng, t_text); } | |
%% | |
/*** C Code section ***/ | |
void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type) { | |
// if the type is different, commit the token | |
if (type != last_text_token_token_type) concat_previous_text_token(); | |
// reallocate memory if necessary | |
if ((last_text_token_len + len) >= last_text_token_buffer_size) { // >= to make sure there's always at least 1 byte padding for null termination | |
last_text_token_buffer_size += T_TEXT_BUFFER_SIZE_INCREMENT; | |
last_text_token = realloc(last_text_token, last_text_token_buffer_size); | |
} | |
// *(last_text_token + last_text_token_len) = *text; // write data | |
for (unsigned int i = 0; i <= len; i++) { | |
*(last_text_token + last_text_token_len + i) = *(text+i); | |
} | |
last_text_token_len += len; | |
// ensure type | |
last_text_token_token_type = type; | |
} | |
void record_token(char *text, unsigned long length, enum TokenType type) { | |
dispatch_sync(calling_queue, ^{ | |
token_callback_block(type, text, | |
NSMakeRange(token_index - multibytes_encountered, length - update_multibytes_encountered(text, length))); | |
}); | |
token_index += length; | |
} | |
int update_multibytes_encountered(char *text, unsigned long len) { | |
if (!MULTIBYTE_RANGE_ADJUSTING_ENABLED) return 0; | |
int count = 0; | |
for (int i = 0; i < len; i++) { | |
if ((*(text+i) & 0b10000000) == 0b10000000 && // check if it's a multi-char byte | |
(~*(text+i) & 0b01000000) == 0b01000000) { // and check that it's not a leading byte | |
count++; | |
} | |
} | |
multibytes_encountered += count; | |
return count; | |
} | |
void concat_previous_text_token() { | |
if (last_text_token_len==0) return; | |
record_token(last_text_token, last_text_token_len, last_text_token_token_type); | |
// clean up | |
free(last_text_token); | |
last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1); | |
last_text_token_len = 0; | |
last_text_token_token_type = t_text; | |
last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT; | |
} | |
void lex(const char *data, TokenLexedBlock block) { | |
// create global queue | |
static dispatch_once_t lexer_queue_creation; | |
dispatch_once(&lexer_queue_creation, ^{ | |
lexer_queue = dispatch_queue_create("com.azuretalon.Macchiato.lexer_queue", NULL); | |
}); | |
dispatch_queue_t current_calling_queue = dispatch_get_current_queue(); | |
dispatch_sync(lexer_queue, ^{ | |
// reset global state | |
token_callback_block = block; | |
token_index = 0; | |
multibytes_encountered = 0; | |
calling_queue = current_calling_queue; // capture from scope | |
last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1); // allocate a kilobyte, zero it out | |
last_text_token_len = 0; | |
last_text_token_token_type = t_text; | |
last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT; | |
// lex | |
YY_BUFFER_STATE buf = yy_scan_string(data); | |
yylex(); | |
concat_previous_text_token(); // grab any remaining text | |
yy_delete_buffer(buf); | |
// free | |
free (last_text_token); | |
last_text_token = NULL; | |
last_text_token_len = 0; | |
last_text_token_buffer_size = 0; | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// MarkdownGrammar.m | |
// Macchiato | |
// | |
// Created by Kenneth Ballenegger on 8/2/12. | |
// Copyright (c) 2012 Azure Talon. All rights reserved. | |
// | |
#import "MarkdownGrammar.h" | |
#import "markdown.lexer.h" | |
#define DEBUGGING_GRAMMAR 0 | |
// TODO: comment with example input and output | |
NSArray * MarkdownSyntaxElementsFromTokens(NSArray *tokens) { | |
NSMutableArray *syntaxElements = [NSMutableArray array]; | |
NSUInteger count = [tokens count]; | |
#define PREPARE_TOKEN(index, var) \ | |
NSDictionary *var##_token; enum TokenType var##_type; \ | |
if (index >= 0 && index < count) { \ | |
var##_token = [tokens objectAtIndex:index]; \ | |
var##_type = [var##_token[@"TokenType"] intValue]; \ | |
} else { \ | |
var##_token = nil; \ | |
var##_type = 0; \ | |
} // END OF MACRO | |
#define PREPARE_RANGE_FROM_TOKEN(var) \ | |
NSRange var##_range; \ | |
if (var##_token) \ | |
var##_range = [var##_token[@"Range"] rangeValue]; // END OF MACRO | |
#define PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(var) \ | |
NSRange toRange; \ | |
if (var##_type) { \ | |
toRange = [var##_token[@"Range"] rangeValue]; \ | |
} else { \ | |
PREPARE_TOKEN(var-1, toRange_##var##_m1); \ | |
toRange = [toRange_##var##_m1_token[@"Range"] rangeValue]; \ | |
} // END OF MACRO | |
#define BRIDGE_RANGES(first, second) \ | |
NSMakeRange(first.location, \ | |
second.location + second.length - first.location) // END OF MACRO | |
#define CONTINUE_AT(j) \ | |
i = j; \ | |
goto next_syntax_element; // END OF MACRO | |
#define COMMIT_ELEMENT(range, type, extra) \ | |
{ /* guard scope for var declaration */ \ | |
NSMutableDictionary *metaDict = \ | |
[NSMutableDictionary dictionaryWithDictionary: \ | |
@{@"Range": [NSValue valueWithRange:range], \ | |
@"SyntaxElementType": [NSNumber numberWithInt:type]}]; \ | |
if (extra) [metaDict addEntriesFromDictionary:extra]; \ | |
[syntaxElements addObject:metaDict]; \ | |
if (DEBUGGING_GRAMMAR) NSLog(@"found token type %d", type); \ | |
} // END OF MACRO | |
#define CASE_SIMPLE_PAIR_FOR_TYPE(t_type, s_type) \ | |
case t_type: { \ | |
for (int j = i+1; j < count; j++) { \ | |
PREPARE_TOKEN(j, j); \ | |
if (j_type == t_type) { \ | |
/* next must be space, newline or punctuation */ \ | |
PREPARE_TOKEN(j+1, j_p1); \ | |
if (!(j_p1_type == t_space || \ | |
j_p1_type == t_newline || \ | |
j_p1_type == t_punctuation)) continue; \ | |
PREPARE_RANGE_FROM_TOKEN(j); \ | |
NSRange commitRange = BRIDGE_RANGES(i_range, j_range); \ | |
NSDictionary *metaDictionary = \ | |
@{@"MakupTokenRanges": @[ \ | |
[NSValue valueWithRange:i_range], \ | |
[NSValue valueWithRange:j_range]]}; \ | |
COMMIT_ELEMENT(commitRange, s_type, metaDictionary); \ | |
CONTINUE_AT(j); \ | |
} else if (j_type == t_newline) { \ | |
/* invalid, fallback to text */ \ | |
break; \ | |
} \ | |
} \ | |
} break; // END OF MACRO | |
#define CHECK_ENTERS_AS_PARAGRAPH \ | |
PREPARE_TOKEN(i-1, i_m1); \ | |
PREPARE_TOKEN(i-2, i_m2); \ | |
if (!((i_m1_type == t_newline || i_m1_type == 0) && \ | |
(i_m2_type == t_newline || i_m2_type == 0))) break; // END OF MACRO | |
#pragma mark - | |
// LOOP STARTS HERE | |
// for list items, we need to keep track of the last token that was part of a list item | |
int lastCommittedListItemToken = -1; | |
for (int i = 0; i < count; i++) { | |
PREPARE_TOKEN(i, i); | |
PREPARE_RANGE_FROM_TOKEN(i); | |
if (DEBUGGING_GRAMMAR) NSLog(@"big loop iteration %d", i); | |
// figure out which opening token it is | |
switch (i_type) { | |
#pragma mark inline code | |
CASE_SIMPLE_PAIR_FOR_TYPE(t_backtick, s_code); | |
#pragma mark emphasis | |
CASE_SIMPLE_PAIR_FOR_TYPE(t_emphasis, s_emphasis); | |
#pragma mark strong | |
CASE_SIMPLE_PAIR_FOR_TYPE(t_strong, s_strong); | |
#pragma mark strong emphasis | |
CASE_SIMPLE_PAIR_FOR_TYPE(t_strong_emphasis, s_strong_emphasis); | |
#pragma mark code block | |
// code block | |
case t_indent: { | |
// a code block must be: | |
// - preceded by two newlines (or nothingness) | |
// - followed by two newline (or nothingness) | |
CHECK_ENTERS_AS_PARAGRAPH; | |
for (int j = i+1; j < count+1; j++) { | |
PREPARE_TOKEN(j, j); | |
// looking for the newline, still part of code block | |
if (!(j_type == t_newline || j_type == 0)) continue; | |
// look ahead to check if next token is also newline | |
PREPARE_TOKEN(j+1, j_p1); | |
if (j_p1_type == t_newline || j_p1_type == 0) { | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
COMMIT_ELEMENT(commitRange, s_code_block, nil); | |
CONTINUE_AT(j); | |
} else if (j_p1_type != t_indent) { | |
break; // a newline followed by something that isn't an indent means invalid code block | |
} | |
} | |
} break; | |
#pragma mark blockquote | |
case t_quotemark: { | |
// a quotation block must be: | |
// - preceded by two newlines (or nothingness) | |
// - followed by two newline (or nothingness) | |
CHECK_ENTERS_AS_PARAGRAPH; | |
NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]]; | |
// we go to count+1 so we can catch a blockquote at the very end of the string | |
for (int j = i+1; j < count+1; j++) { | |
PREPARE_TOKEN(j, j); | |
if (j_type == t_quotemark) [markupTokenRanges addObject:j_token[@"Range"]]; | |
// looking for the newline, still part of blockquote | |
if (!(j_type == t_newline || j_type == 0)) continue; | |
// look ahead to check if next token is also newline | |
PREPARE_TOKEN(j+1, j_p1); | |
if (j_p1_type == t_newline || j_p1_type == 0) { | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
COMMIT_ELEMENT(commitRange, s_blockquote, @{@"MakupTokenRanges": markupTokenRanges}); | |
CONTINUE_AT(j); | |
} else if (j_p1_type != t_quotemark) { | |
break; // a newline followed by something that isn't a quotemark means invalid blockquote | |
} | |
} | |
} break; | |
#pragma mark headlines | |
case t_headline_mark: { | |
// a regular headline must be: | |
// - preceded by two newlines (or nothingness) | |
// - followed by two newline (or nothingness) | |
NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]]; | |
CHECK_ENTERS_AS_PARAGRAPH; | |
// we go to count+1 so we can catch a headline at the very end of the string | |
for (int j = i+1; j < count+1; j++) { | |
PREPARE_TOKEN(j, j); | |
// looking for the newline, still part of code block | |
if (!(j_type == t_newline || j_type == 0)) continue; | |
// look ahead to check if next token is also newline | |
PREPARE_TOKEN(j+1, j_p1); | |
if ((j_p1_type == t_newline || j_p1_type == 0)) { | |
PREPARE_TOKEN(j-1, j_m1); | |
if (j_m1_type == t_headline_mark) | |
[markupTokenRanges addObject:j_m1_token[@"Range"]]; | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
enum SyntaxElementType commitType = (enum SyntaxElementType)((int)s_headline_1 + (MAX(1, MIN(6, [i_token[@"Text"] length])) - 1)); | |
COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": markupTokenRanges}); | |
CONTINUE_AT(j); | |
} else { | |
break; // this means a multi-line headline, which is not allowed | |
} | |
} | |
} break; | |
#pragma mark github code block | |
case t_triple_backtick: { | |
// a github-style code block is a paragraph | |
CHECK_ENTERS_AS_PARAGRAPH; | |
for (int j = i+1; j < count; j++) { | |
PREPARE_TOKEN(j, j); | |
// iterate until we find the matching triple backtick | |
if (j_type != t_triple_backtick) continue; | |
// look ahead to check if next two tokens are newlines | |
PREPARE_TOKEN(j+1, j_p1); | |
PREPARE_TOKEN(j+2, j_p2); | |
if ((j_p1_type == t_newline || j_p1_type == 0) && | |
(j_p2_type == t_newline || j_p2_type == 0)) { | |
int j_p1 = j+1; | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j_p1); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
NSDictionary *metaDictionary = @{@"MakupTokenRanges": @[i_token[@"Range"], j_token[@"Range"]]}; | |
COMMIT_ELEMENT(commitRange, s_code_block, metaDictionary); | |
CONTINUE_AT(j+1); | |
} | |
} | |
} break; | |
#pragma mark underlined headline | |
case t_headline_underline: case t_headline_double_underline: { | |
// previous token MUST be newline | |
PREPARE_TOKEN(i-1, i_m1); | |
if (i_m1_type != t_newline) break; | |
// if no preceding newline found, assume zero -- this deals with headline at start of file | |
int startIndex = 0; | |
// loop backwards to find the actual headline | |
for (int j = i-2; j >= 0; j--) { | |
PREPARE_TOKEN(j, j); | |
// i'm looking for a preceding newline to find beginning of previous line | |
if (j_type == t_newline) { | |
startIndex = j+1; | |
break; | |
} | |
} | |
// verify that it starts as paragraph, or on bounds | |
PREPARE_TOKEN(startIndex-1, s_m1); | |
if (!(s_m1_type == t_newline || s_m1_type == 0)) break; | |
PREPARE_TOKEN(startIndex-2, s_m2); | |
if (!(s_m2_type == t_newline || s_m2_type == 0)) break; | |
// verify that it's followed by paragraph end | |
PREPARE_TOKEN(i+1, i_p1); | |
if (!(i_p1_type == t_newline || i_p1_type == 0)) break; | |
PREPARE_TOKEN(i+2, i_p2); | |
if (!(i_p2_type == t_newline || i_p2_type == 0)) break; | |
// if we got to here without breaking, we're a valid match | |
PREPARE_TOKEN(startIndex, start); | |
PREPARE_RANGE_FROM_TOKEN(i_p1) | |
NSRange start_range = [start_token[@"Range"] rangeValue]; | |
NSRange commitRange = BRIDGE_RANGES(start_range, i_p1_range); | |
enum SyntaxElementType commitType; | |
if (i_type == t_headline_double_underline) | |
commitType = s_headline_1; | |
else | |
commitType = s_headline_2; | |
COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": @[i_token[@"Range"]]}); | |
CONTINUE_AT(i+1); // eat newline | |
} break; | |
#pragma mark rule | |
case t_rule: { | |
CHECK_ENTERS_AS_PARAGRAPH; | |
PREPARE_TOKEN(i+1, i_p1); | |
PREPARE_TOKEN(i+2, i_p2); | |
if ((i_p1_type == t_newline || i_p1_type == 0) && | |
(i_p2_type == t_newline || i_p2_type == 0)) { | |
COMMIT_ELEMENT(i_range, s_rule, nil); | |
CONTINUE_AT(i+1); | |
} | |
} break; | |
#pragma mark links & images | |
case t_open_bracket: { | |
// seek end of link text | |
int linkTextEnd = -1; | |
for (int j = i+1; j < count; j++) { | |
PREPARE_TOKEN(j, j); | |
if (j_type == t_close_bracket) { | |
// must be followed by [, (, or :, else invalid | |
PREPARE_TOKEN(j+1, j_p1); | |
if (!(j_p1_type == t_open_bracket || | |
j_p1_type == t_open_paren || | |
j_p1_type == t_colon)) break; | |
// found! | |
linkTextEnd = j; break; | |
} else if (j_type == t_newline) { | |
// invalid, fallback to text | |
break; | |
} | |
} | |
if (linkTextEnd < 0) break; // end not found, this is not a link | |
// now that we have end of link text, find rest of link | |
PREPARE_TOKEN(linkTextEnd+1, lte_p1); // prepare "link target open" token, [ or ( | |
// start iterating after the lto | |
if (lte_p1_type == t_colon) { | |
// we've got a link definition | |
// now we've gotta check how the grammar element started, definitions need to be one per line | |
PREPARE_TOKEN(i-1, i_m1); | |
// also, t_colon needs to be followed by a space | |
PREPARE_TOKEN(linkTextEnd+2, lte_p2); | |
if (!(i_m1_type == t_newline || i_m1_type == 0) || | |
lte_p2_type != t_space) { | |
// fail | |
CONTINUE_AT(i+1); | |
} | |
for (int j = linkTextEnd+3; j < count; j++) { | |
PREPARE_TOKEN(j, j); | |
// looking for newline or end of file | |
if (j_type == t_newline || j_type == 0) { | |
// we're good! | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
// prepare name range | |
// NOTE: this is meant for ARC, i guess… not super clean. fuck. | |
NSMutableString *name = [[NSMutableString alloc] init]; | |
for (int k = i+1; k <= linkTextEnd-1; k++) { | |
PREPARE_TOKEN(k, k); | |
[name appendString:k_token[@"Text"]]; | |
} | |
// prepare url range | |
NSMutableString *url = [[NSMutableString alloc] init]; | |
for (int k = linkTextEnd+3; k <= j-1; k++) { | |
PREPARE_TOKEN(k, k); | |
[url appendString:k_token[@"Text"]]; | |
} | |
NSDictionary *metaDictionary = @{ | |
@"Name": [NSString stringWithString:name], | |
@"URL": [NSString stringWithString:url] | |
}; | |
COMMIT_ELEMENT(commitRange, s_link_info, metaDictionary); | |
CONTINUE_AT(j); | |
} | |
} | |
} else if (lte_p1_type == t_open_bracket || lte_p1_type == t_open_paren) { | |
// we've got an inline link | |
for (int j = linkTextEnd+2; j < count; j++) { | |
PREPARE_TOKEN(j, j); | |
if ((lte_p1_type == t_open_bracket && j_type == t_close_bracket) || | |
(lte_p1_type == t_open_paren && j_type == t_close_paren)) { | |
// find closing ] or ) | |
// find what link points to | |
NSString *nameOrURLKey = (lte_p1_type == t_open_bracket ? @"Name" : @"URL"); | |
// actually, it may be an image if it's preceded by a bang! | |
PREPARE_TOKEN(i-1, i_m1); | |
BOOL isImage = (i_m1_type == t_bang && lte_p1_type == t_open_paren); | |
enum SyntaxElementType commitType = (isImage ? s_image : s_link); | |
PREPARE_RANGE_FROM_TOKEN(i_m1); | |
NSRange fromRange = (isImage ? i_m1_range : i_range); | |
// we're good! | |
PREPARE_RANGE_FROM_TOKEN(j); | |
NSRange commitRange = BRIDGE_RANGES(fromRange, j_range); | |
NSMutableString *linkValue = [[NSMutableString alloc] init]; | |
BOOL isEmptyLink = (linkTextEnd+2 == j && lte_p1_type == t_open_bracket && j_type == t_close_bracket); | |
int linkValueFrom = isEmptyLink ? i+1 : linkTextEnd+2; | |
int linkValueTo = isEmptyLink ? linkTextEnd-1 : j-1; | |
for (int k = linkValueFrom; k <= linkValueTo; k++) { | |
PREPARE_TOKEN(k, k); | |
[linkValue appendString:k_token[@"Text"]]; | |
} | |
COMMIT_ELEMENT(commitRange, commitType, @{nameOrURLKey: [NSString stringWithString:linkValue]}); | |
CONTINUE_AT(j); | |
} else if (j_type == t_newline) { | |
// invalid, fallback to text | |
break; | |
} | |
} | |
} | |
} break; | |
#pragma mark list items | |
case t_bullet: { | |
// enforce paragraph entrance, unless we're already inside a list | |
if (i-1 != lastCommittedListItemToken) { | |
CHECK_ENTERS_AS_PARAGRAPH; | |
} | |
for (int j = i+1; j < count+1; j++) { | |
PREPARE_TOKEN(j, j); | |
// looking for newline or EOF | |
if (!(j_type == t_newline || j_type == 0)) continue; | |
PREPARE_TOKEN(j+1, j_p1); | |
// next token can be a paragraph end or another bullet | |
// Note: we don't know whether next bullet is valid at this point, but fuck it | |
if (j_p1_type == t_newline || j_p1_type == t_bullet || j_p1_type == 0) { | |
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j); | |
NSRange commitRange = BRIDGE_RANGES(i_range, toRange); | |
COMMIT_ELEMENT(commitRange, s_list_item, @{@"MakupTokenRanges": @[i_token[@"Range"]]}); | |
lastCommittedListItemToken = j; | |
CONTINUE_AT(i+1); // we continue inside the list item, so we can parse inline styles, too | |
} else if (j_p1_type == t_indent) { | |
continue; // next line being an indent means it's part of the list item | |
} else if (j_p1_type == t_space) { | |
// if it's a space, we'll also allow a combination of X space, where X is the length of the list item bullet | |
BOOL valid = YES; | |
int k = j+2; | |
for (; k <= j+i_range.length; k++) { | |
PREPARE_TOKEN(k, k); | |
if (k_type == t_space) continue; | |
// else invalid | |
valid = NO; | |
break; | |
} | |
if (valid) { | |
j = k; | |
continue; // valid | |
} else { | |
break; // invalid | |
} | |
} else { | |
break; // invalid, discard entire item and move on | |
} | |
} | |
} break; | |
#pragma mark - default | |
case t_text: default: { | |
// do nothing | |
} break; | |
} | |
next_syntax_element: continue; | |
} | |
return syntaxElements; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// MarkdownSyntaxHighlighter.m | |
// Macchiato | |
// | |
// Created by Kenneth Ballenegger on 7/24/12. | |
// Copyright (c) 2012 Azure Talon. All rights reserved. | |
// | |
#import "MarkdownSyntaxHighlighter.h" | |
#import "MacchiatoThemeController.h" | |
#import "markdown.lexer.h" | |
#import "MarkdownGrammar.h" | |
static NSString *const TokenTypeAttributeName = @"TokenTypeAttributeName"; | |
static NSString *const SyntaxElementTypeAttributeName = @"SyntaxElementTypeAttributeName"; | |
@interface MarkdownSyntaxHighlighter () <NSLayoutManagerDelegate> { | |
@private | |
NSTextStorage *textStorage; | |
} | |
@end | |
@implementation MarkdownSyntaxHighlighter | |
- (id)initWithTextStorage:(NSTextStorage *)storage { | |
if (self = [super init]) { | |
self->textStorage = storage; | |
} | |
return self; | |
} | |
- (id)init { | |
NSLog(@"You should only be using initWithTextStorage:."); | |
abort(); | |
} | |
- (void)highlightRange:(NSRange)range { | |
// return; | |
if (range.location == 0 && range.length == 0) | |
return; // don't highlight empty document it's buggzy | |
NSMutableArray *tokens = [NSMutableArray array]; | |
NSString *substringToLex = [self->textStorage.string substringWithRange:range]; | |
// testing the lexer | |
lex([substringToLex cStringUsingEncoding:NSUTF8StringEncoding], ^(enum TokenType ttype, const char *ttext, NSRange trange) { | |
[tokens addObject:@{@"TokenType": [NSNumber numberWithInt:ttype], @"Range": [NSValue valueWithRange:trange], @"Text": [NSString stringWithUTF8String:ttext]}]; | |
}); | |
NSArray *syntaxElements = MarkdownSyntaxElementsFromTokens(tokens); | |
// this should be done on main thread | |
dispatch_sync(dispatch_get_main_queue(), ^{ | |
// reset attributes globally | |
// [self->textStorage setAttributes:nil range:range]; | |
[self highlightSyntaxElement:@{@"Range": [NSValue valueWithRange:NSMakeRange(0, range.length)], @"SyntaxElementType": [NSNumber numberWithInt:s_default]} inRange:range]; | |
// set attributes for syntax element tokens | |
for (NSDictionary *syntaxElement in syntaxElements) { | |
[self highlightSyntaxElement:syntaxElement inRange:range]; | |
} | |
// debug mode | |
NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults]; | |
if ([defaults integerForKey:@"MacchiatoDebugMode"] == 1) { | |
for (NSDictionary *token in tokens) { | |
float f = ((int)[token[@"TokenType"] intValue] - 10) / 20.0f; | |
NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7]; | |
NSRange absoluteRange = [token[@"Range"] rangeValue]; | |
absoluteRange.location += range.location; | |
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange]; | |
} | |
} else if ([defaults integerForKey:@"MacchiatoDebugMode"] == 2) { | |
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:[NSColor colorWithCalibratedHue:(s_default - 10) / 20.0f saturation:1 brightness:1 alpha:.7] range:range]; | |
for (NSDictionary *syntaxElement in syntaxElements) { | |
float f = ((int)[syntaxElement[@"SyntaxElementType"] intValue] - 10) / 20.0f; | |
NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7]; | |
NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue]; | |
absoluteRange.location += range.location; | |
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange]; | |
} | |
} | |
}); | |
} | |
- (void)highlightSyntaxElement:(NSDictionary *)syntaxElement inRange:(NSRange)range { | |
enum SyntaxElementType stype = ((NSNumber *)syntaxElement[@"SyntaxElementType"]).intValue; | |
NSMutableDictionary *attrs = [NSMutableDictionary dictionary]; | |
MacchiatoThemeController *themeController = [MacchiatoThemeController sharedController]; | |
// use defaults | |
attrs[NSFontAttributeName] = [themeController currentMainFont]; | |
attrs[NSForegroundColorAttributeName] = [themeController currentTextForegroundColor]; | |
#define CASE_HEADLINE_TYPE(index) case s_headline_##index: { attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait] toSize:20.0f - index]; } break; | |
switch (stype) { | |
case s_emphasis: { | |
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait]; | |
} break; | |
case s_strong: { | |
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait]; | |
} break; | |
case s_strong_emphasis: { | |
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait] toHaveTrait:NSFontBoldTrait]; | |
} break; | |
case s_code: | |
case s_code_block: { | |
attrs[NSFontAttributeName] = [themeController currentMonospacedFont]; | |
} break; | |
case s_blockquote: { | |
attrs[NSFontAttributeName] = [themeController currentQuoteFont]; | |
} break; | |
case s_link: | |
case s_image: | |
case s_link_info: { | |
attrs[NSUnderlineStyleAttributeName] = [NSNumber numberWithInt:NSSingleUnderlineStyle]; | |
attrs[NSUnderlineColorAttributeName] = [themeController currentUnderlineColor]; | |
attrs[NSCursorAttributeName] = [NSCursor pointingHandCursor]; | |
if (stype == s_link) { | |
if (syntaxElement[@"URL"]) { | |
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenURL); | |
attrs[@"URL"] = syntaxElement[@"URL"]; | |
} else if (syntaxElement[@"Name"]) { | |
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenNamedLink); | |
attrs[@"Name"] = syntaxElement[@"Name"]; | |
} | |
} else { | |
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionRenderImage); | |
attrs[@"URL"] = syntaxElement[@"URL"]; | |
} | |
} break; | |
case s_rule: { | |
NSMutableParagraphStyle *paragraphStyle = [[themeController currentParagraphStyle] mutableCopy]; | |
[paragraphStyle setAlignment:NSCenterTextAlignment]; | |
attrs[NSParagraphStyleAttributeName] = paragraphStyle; | |
} break; | |
CASE_HEADLINE_TYPE(1); | |
CASE_HEADLINE_TYPE(2); | |
CASE_HEADLINE_TYPE(3); | |
CASE_HEADLINE_TYPE(4); | |
CASE_HEADLINE_TYPE(5); | |
CASE_HEADLINE_TYPE(6); | |
case s_default: default: { | |
// do nothing | |
} break; | |
} | |
// callback on named link info | |
if (stype == s_link_info && self.namedLinkCallback) { | |
self.namedLinkCallback(syntaxElement[@"Name"], syntaxElement[@"URL"]); | |
} | |
NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue]; | |
absoluteRange.location += range.location; | |
attrs[@"SyntexElementRange"] = [NSValue valueWithRange:absoluteRange]; | |
[self->textStorage setAttributes:attrs range:absoluteRange]; | |
for (NSValue *markupStatementRangeValue in syntaxElement[@"MakupTokenRanges"]) { | |
NSRange markupStatementRange = markupStatementRangeValue.rangeValue; | |
markupStatementRange.location += range.location; | |
[self->textStorage addAttribute:NSForegroundColorAttributeName value:[themeController currentTextSubduedColor] range:markupStatementRange]; | |
} | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment