Skip to content

Instantly share code, notes, and snippets.

@kballenegger
Created October 26, 2012 01:10
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kballenegger/29dabe4b6e762ee221df to your computer and use it in GitHub Desktop.
Save kballenegger/29dabe4b6e762ee221df to your computer and use it in GitHub Desktop.
Macchiato Markdown Parsing Engine
/*** Definition section ***/
%{
/* C code to be copied verbatim */
#include <stdio.h>
#include <stdlib.h>
#include "markdown.lexer.h"
#define TOKEN(type) \
BEGIN(neutral); \
concat_previous_text_token(); \
record_token(yytext, yyleng, type)
#define T_TEXT_BUFFER_SIZE_INCREMENT 1024
#define MULTIBYTE_RANGE_ADJUSTING_ENABLED 1
void concat_previous_text_token(void);
void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type);
int update_multibytes_encountered(char *text, unsigned long len);
void record_token(char *text, unsigned long length, enum TokenType type);
// global state
NSUInteger token_index = 0;
TokenLexedBlock token_callback_block = nil;
static dispatch_queue_t lexer_queue;
static dispatch_queue_t calling_queue;
int multibytes_encountered = 0;
// concatenation of text tokens
char *last_text_token = NULL;
int last_text_token_len = 0;
int last_text_token_buffer_size = 0;
enum TokenType last_text_token_token_type = t_text;
%}
/*** Regex definition section ***/
em "*"|"_"
rule ^"---"|"***"|"==="$
symbol_bullet " "{0,3}("•"|"-"|"*"|"✓"|"✗")(" "|\t)
number_bullet " "{0,3}([0-9]+("."|")"))(" "|\t)
bullet {symbol_bullet}|{number_bullet}
indent \t|" "{4}
punctuation "."|","|"?"|"!"|"-"|"–"|"—"|"/"|"\\"|"'"|"\""|":"|";"|"("|")"|"["|"]"|"|"
/* This tells flex to read only one input file */
%option noyywrap
/* states */
%s neutral
%s after_bullet
%s in_paren
%s in_bracket
%%
/*** Rules section ***/
<in_paren>")" { TOKEN(t_close_paren); }
<in_paren>. { append_to_text_token_buffer(yytext, yyleng, t_paren_text); BEGIN(in_paren); }
<in_bracket>"]" { TOKEN(t_close_bracket); }
<in_bracket>. { append_to_text_token_buffer(yytext, yyleng, t_bracket_text); BEGIN(in_bracket); }
{rule} { TOKEN(t_rule); }
^{bullet} { TOKEN(t_bullet); BEGIN(after_bullet); }
{em}{3} { TOKEN(t_strong_emphasis); }
{em}{2} { TOKEN(t_strong); }
{em} { TOKEN(t_emphasis); }
: { TOKEN(t_colon); }
``` { TOKEN(t_triple_backtick); }
` { TOKEN(t_backtick); }
^{indent} { TOKEN(t_indent); }
^">" { TOKEN(t_quotemark); }
"!" { TOKEN(t_bang); }
"(" { TOKEN(t_open_paren); BEGIN(in_paren); }
"[" { TOKEN(t_open_bracket); BEGIN(in_bracket); }
^"-"+$ { TOKEN(t_headline_underline); }
^"="+$ { TOKEN(t_headline_double_underline); }
^"#"{1,6} { TOKEN(t_headline_mark); }
"#"{1,6}$ { TOKEN(t_headline_mark); }
" \n" { TOKEN(t_hard_newline); }
"\n" { TOKEN(t_newline); }
" " { TOKEN(t_space); }
{punctuation} { TOKEN(t_punctuation); }
. { append_to_text_token_buffer(yytext, yyleng, t_text); }
%%
/*** C Code section ***/
void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type) {
// if the type is different, commit the token
if (type != last_text_token_token_type) concat_previous_text_token();
// reallocate memory if necessary
if ((last_text_token_len + len) >= last_text_token_buffer_size) { // >= to make sure there's always at least 1 byte padding for null termination
last_text_token_buffer_size += T_TEXT_BUFFER_SIZE_INCREMENT;
last_text_token = realloc(last_text_token, last_text_token_buffer_size);
}
// *(last_text_token + last_text_token_len) = *text; // write data
for (unsigned int i = 0; i <= len; i++) {
*(last_text_token + last_text_token_len + i) = *(text+i);
}
last_text_token_len += len;
// ensure type
last_text_token_token_type = type;
}
void record_token(char *text, unsigned long length, enum TokenType type) {
dispatch_sync(calling_queue, ^{
token_callback_block(type, text,
NSMakeRange(token_index - multibytes_encountered, length - update_multibytes_encountered(text, length)));
});
token_index += length;
}
int update_multibytes_encountered(char *text, unsigned long len) {
if (!MULTIBYTE_RANGE_ADJUSTING_ENABLED) return 0;
int count = 0;
for (int i = 0; i < len; i++) {
if ((*(text+i) & 0b10000000) == 0b10000000 && // check if it's a multi-char byte
(~*(text+i) & 0b01000000) == 0b01000000) { // and check that it's not a leading byte
count++;
}
}
multibytes_encountered += count;
return count;
}
void concat_previous_text_token() {
if (last_text_token_len==0) return;
record_token(last_text_token, last_text_token_len, last_text_token_token_type);
// clean up
free(last_text_token);
last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1);
last_text_token_len = 0;
last_text_token_token_type = t_text;
last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;
}
void lex(const char *data, TokenLexedBlock block) {
// create global queue
static dispatch_once_t lexer_queue_creation;
dispatch_once(&lexer_queue_creation, ^{
lexer_queue = dispatch_queue_create("com.azuretalon.Macchiato.lexer_queue", NULL);
});
dispatch_queue_t current_calling_queue = dispatch_get_current_queue();
dispatch_sync(lexer_queue, ^{
// reset global state
token_callback_block = block;
token_index = 0;
multibytes_encountered = 0;
calling_queue = current_calling_queue; // capture from scope
last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1); // allocate a kilobyte, zero it out
last_text_token_len = 0;
last_text_token_token_type = t_text;
last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;
// lex
YY_BUFFER_STATE buf = yy_scan_string(data);
yylex();
concat_previous_text_token(); // grab any remaining text
yy_delete_buffer(buf);
// free
free (last_text_token);
last_text_token = NULL;
last_text_token_len = 0;
last_text_token_buffer_size = 0;
});
}
//
// MarkdownGrammar.m
// Macchiato
//
// Created by Kenneth Ballenegger on 8/2/12.
// Copyright (c) 2012 Azure Talon. All rights reserved.
//
#import "MarkdownGrammar.h"
#import "markdown.lexer.h"
#define DEBUGGING_GRAMMAR 0
// TODO: comment with example input and output
NSArray * MarkdownSyntaxElementsFromTokens(NSArray *tokens) {
NSMutableArray *syntaxElements = [NSMutableArray array];
NSUInteger count = [tokens count];
#define PREPARE_TOKEN(index, var) \
NSDictionary *var##_token; enum TokenType var##_type; \
if (index >= 0 && index < count) { \
var##_token = [tokens objectAtIndex:index]; \
var##_type = [var##_token[@"TokenType"] intValue]; \
} else { \
var##_token = nil; \
var##_type = 0; \
} // END OF MACRO
#define PREPARE_RANGE_FROM_TOKEN(var) \
NSRange var##_range; \
if (var##_token) \
var##_range = [var##_token[@"Range"] rangeValue]; // END OF MACRO
#define PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(var) \
NSRange toRange; \
if (var##_type) { \
toRange = [var##_token[@"Range"] rangeValue]; \
} else { \
PREPARE_TOKEN(var-1, toRange_##var##_m1); \
toRange = [toRange_##var##_m1_token[@"Range"] rangeValue]; \
} // END OF MACRO
#define BRIDGE_RANGES(first, second) \
NSMakeRange(first.location, \
second.location + second.length - first.location) // END OF MACRO
#define CONTINUE_AT(j) \
i = j; \
goto next_syntax_element; // END OF MACRO
#define COMMIT_ELEMENT(range, type, extra) \
{ /* guard scope for var declaration */ \
NSMutableDictionary *metaDict = \
[NSMutableDictionary dictionaryWithDictionary: \
@{@"Range": [NSValue valueWithRange:range], \
@"SyntaxElementType": [NSNumber numberWithInt:type]}]; \
if (extra) [metaDict addEntriesFromDictionary:extra]; \
[syntaxElements addObject:metaDict]; \
if (DEBUGGING_GRAMMAR) NSLog(@"found token type %d", type); \
} // END OF MACRO
#define CASE_SIMPLE_PAIR_FOR_TYPE(t_type, s_type) \
case t_type: { \
for (int j = i+1; j < count; j++) { \
PREPARE_TOKEN(j, j); \
if (j_type == t_type) { \
/* next must be space, newline or punctuation */ \
PREPARE_TOKEN(j+1, j_p1); \
if (!(j_p1_type == t_space || \
j_p1_type == t_newline || \
j_p1_type == t_punctuation)) continue; \
PREPARE_RANGE_FROM_TOKEN(j); \
NSRange commitRange = BRIDGE_RANGES(i_range, j_range); \
NSDictionary *metaDictionary = \
@{@"MakupTokenRanges": @[ \
[NSValue valueWithRange:i_range], \
[NSValue valueWithRange:j_range]]}; \
COMMIT_ELEMENT(commitRange, s_type, metaDictionary); \
CONTINUE_AT(j); \
} else if (j_type == t_newline) { \
/* invalid, fallback to text */ \
break; \
} \
} \
} break; // END OF MACRO
#define CHECK_ENTERS_AS_PARAGRAPH \
PREPARE_TOKEN(i-1, i_m1); \
PREPARE_TOKEN(i-2, i_m2); \
if (!((i_m1_type == t_newline || i_m1_type == 0) && \
(i_m2_type == t_newline || i_m2_type == 0))) break; // END OF MACRO
#pragma mark -
// LOOP STARTS HERE
// for list items, we need to keep track of the last token that was part of a list item
int lastCommittedListItemToken = -1;
for (int i = 0; i < count; i++) {
PREPARE_TOKEN(i, i);
PREPARE_RANGE_FROM_TOKEN(i);
if (DEBUGGING_GRAMMAR) NSLog(@"big loop iteration %d", i);
// figure out which opening token it is
switch (i_type) {
#pragma mark inline code
CASE_SIMPLE_PAIR_FOR_TYPE(t_backtick, s_code);
#pragma mark emphasis
CASE_SIMPLE_PAIR_FOR_TYPE(t_emphasis, s_emphasis);
#pragma mark strong
CASE_SIMPLE_PAIR_FOR_TYPE(t_strong, s_strong);
#pragma mark strong emphasis
CASE_SIMPLE_PAIR_FOR_TYPE(t_strong_emphasis, s_strong_emphasis);
#pragma mark code block
// code block
case t_indent: {
// a code block must be:
// - preceded by two newlines (or nothingness)
// - followed by two newline (or nothingness)
CHECK_ENTERS_AS_PARAGRAPH;
for (int j = i+1; j < count+1; j++) {
PREPARE_TOKEN(j, j);
// looking for the newline, still part of code block
if (!(j_type == t_newline || j_type == 0)) continue;
// look ahead to check if next token is also newline
PREPARE_TOKEN(j+1, j_p1);
if (j_p1_type == t_newline || j_p1_type == 0) {
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
COMMIT_ELEMENT(commitRange, s_code_block, nil);
CONTINUE_AT(j);
} else if (j_p1_type != t_indent) {
break; // a newline followed by something that isn't an indent means invalid code block
}
}
} break;
#pragma mark blockquote
case t_quotemark: {
// a quotation block must be:
// - preceded by two newlines (or nothingness)
// - followed by two newline (or nothingness)
CHECK_ENTERS_AS_PARAGRAPH;
NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];
// we go to count+1 so we can catch a blockquote at the very end of the string
for (int j = i+1; j < count+1; j++) {
PREPARE_TOKEN(j, j);
if (j_type == t_quotemark) [markupTokenRanges addObject:j_token[@"Range"]];
// looking for the newline, still part of blockquote
if (!(j_type == t_newline || j_type == 0)) continue;
// look ahead to check if next token is also newline
PREPARE_TOKEN(j+1, j_p1);
if (j_p1_type == t_newline || j_p1_type == 0) {
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
COMMIT_ELEMENT(commitRange, s_blockquote, @{@"MakupTokenRanges": markupTokenRanges});
CONTINUE_AT(j);
} else if (j_p1_type != t_quotemark) {
break; // a newline followed by something that isn't a quotemark means invalid blockquote
}
}
} break;
#pragma mark headlines
case t_headline_mark: {
// a regular headline must be:
// - preceded by two newlines (or nothingness)
// - followed by two newline (or nothingness)
NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];
CHECK_ENTERS_AS_PARAGRAPH;
// we go to count+1 so we can catch a headline at the very end of the string
for (int j = i+1; j < count+1; j++) {
PREPARE_TOKEN(j, j);
// looking for the newline, still part of code block
if (!(j_type == t_newline || j_type == 0)) continue;
// look ahead to check if next token is also newline
PREPARE_TOKEN(j+1, j_p1);
if ((j_p1_type == t_newline || j_p1_type == 0)) {
PREPARE_TOKEN(j-1, j_m1);
if (j_m1_type == t_headline_mark)
[markupTokenRanges addObject:j_m1_token[@"Range"]];
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
enum SyntaxElementType commitType = (enum SyntaxElementType)((int)s_headline_1 + (MAX(1, MIN(6, [i_token[@"Text"] length])) - 1));
COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": markupTokenRanges});
CONTINUE_AT(j);
} else {
break; // this means a multi-line headline, which is not allowed
}
}
} break;
#pragma mark github code block
case t_triple_backtick: {
// a github-style code block is a paragraph
CHECK_ENTERS_AS_PARAGRAPH;
for (int j = i+1; j < count; j++) {
PREPARE_TOKEN(j, j);
// iterate until we find the matching triple backtick
if (j_type != t_triple_backtick) continue;
// look ahead to check if next two tokens are newlines
PREPARE_TOKEN(j+1, j_p1);
PREPARE_TOKEN(j+2, j_p2);
if ((j_p1_type == t_newline || j_p1_type == 0) &&
(j_p2_type == t_newline || j_p2_type == 0)) {
int j_p1 = j+1;
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j_p1);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
NSDictionary *metaDictionary = @{@"MakupTokenRanges": @[i_token[@"Range"], j_token[@"Range"]]};
COMMIT_ELEMENT(commitRange, s_code_block, metaDictionary);
CONTINUE_AT(j+1);
}
}
} break;
#pragma mark underlined headline
case t_headline_underline: case t_headline_double_underline: {
// previous token MUST be newline
PREPARE_TOKEN(i-1, i_m1);
if (i_m1_type != t_newline) break;
// if no preceding newline found, assume zero -- this deals with headline at start of file
int startIndex = 0;
// loop backwards to find the actual headline
for (int j = i-2; j >= 0; j--) {
PREPARE_TOKEN(j, j);
// i'm looking for a preceding newline to find beginning of previous line
if (j_type == t_newline) {
startIndex = j+1;
break;
}
}
// verify that it starts as paragraph, or on bounds
PREPARE_TOKEN(startIndex-1, s_m1);
if (!(s_m1_type == t_newline || s_m1_type == 0)) break;
PREPARE_TOKEN(startIndex-2, s_m2);
if (!(s_m2_type == t_newline || s_m2_type == 0)) break;
// verify that it's followed by paragraph end
PREPARE_TOKEN(i+1, i_p1);
if (!(i_p1_type == t_newline || i_p1_type == 0)) break;
PREPARE_TOKEN(i+2, i_p2);
if (!(i_p2_type == t_newline || i_p2_type == 0)) break;
// if we got to here without breaking, we're a valid match
PREPARE_TOKEN(startIndex, start);
PREPARE_RANGE_FROM_TOKEN(i_p1)
NSRange start_range = [start_token[@"Range"] rangeValue];
NSRange commitRange = BRIDGE_RANGES(start_range, i_p1_range);
enum SyntaxElementType commitType;
if (i_type == t_headline_double_underline)
commitType = s_headline_1;
else
commitType = s_headline_2;
COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
CONTINUE_AT(i+1); // eat newline
} break;
#pragma mark rule
case t_rule: {
CHECK_ENTERS_AS_PARAGRAPH;
PREPARE_TOKEN(i+1, i_p1);
PREPARE_TOKEN(i+2, i_p2);
if ((i_p1_type == t_newline || i_p1_type == 0) &&
(i_p2_type == t_newline || i_p2_type == 0)) {
COMMIT_ELEMENT(i_range, s_rule, nil);
CONTINUE_AT(i+1);
}
} break;
#pragma mark links & images
case t_open_bracket: {
// seek end of link text
int linkTextEnd = -1;
for (int j = i+1; j < count; j++) {
PREPARE_TOKEN(j, j);
if (j_type == t_close_bracket) {
// must be followed by [, (, or :, else invalid
PREPARE_TOKEN(j+1, j_p1);
if (!(j_p1_type == t_open_bracket ||
j_p1_type == t_open_paren ||
j_p1_type == t_colon)) break;
// found!
linkTextEnd = j; break;
} else if (j_type == t_newline) {
// invalid, fallback to text
break;
}
}
if (linkTextEnd < 0) break; // end not found, this is not a link
// now that we have end of link text, find rest of link
PREPARE_TOKEN(linkTextEnd+1, lte_p1); // prepare "link target open" token, [ or (
// start iterating after the lto
if (lte_p1_type == t_colon) {
// we've got a link definition
// now we've gotta check how the grammar element started, definitions need to be one per line
PREPARE_TOKEN(i-1, i_m1);
// also, t_colon needs to be followed by a space
PREPARE_TOKEN(linkTextEnd+2, lte_p2);
if (!(i_m1_type == t_newline || i_m1_type == 0) ||
lte_p2_type != t_space) {
// fail
CONTINUE_AT(i+1);
}
for (int j = linkTextEnd+3; j < count; j++) {
PREPARE_TOKEN(j, j);
// looking for newline or end of file
if (j_type == t_newline || j_type == 0) {
// we're good!
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
// prepare name range
// NOTE: this is meant for ARC, i guess… not super clean. fuck.
NSMutableString *name = [[NSMutableString alloc] init];
for (int k = i+1; k <= linkTextEnd-1; k++) {
PREPARE_TOKEN(k, k);
[name appendString:k_token[@"Text"]];
}
// prepare url range
NSMutableString *url = [[NSMutableString alloc] init];
for (int k = linkTextEnd+3; k <= j-1; k++) {
PREPARE_TOKEN(k, k);
[url appendString:k_token[@"Text"]];
}
NSDictionary *metaDictionary = @{
@"Name": [NSString stringWithString:name],
@"URL": [NSString stringWithString:url]
};
COMMIT_ELEMENT(commitRange, s_link_info, metaDictionary);
CONTINUE_AT(j);
}
}
} else if (lte_p1_type == t_open_bracket || lte_p1_type == t_open_paren) {
// we've got an inline link
for (int j = linkTextEnd+2; j < count; j++) {
PREPARE_TOKEN(j, j);
if ((lte_p1_type == t_open_bracket && j_type == t_close_bracket) ||
(lte_p1_type == t_open_paren && j_type == t_close_paren)) {
// find closing ] or )
// find what link points to
NSString *nameOrURLKey = (lte_p1_type == t_open_bracket ? @"Name" : @"URL");
// actually, it may be an image if it's preceded by a bang!
PREPARE_TOKEN(i-1, i_m1);
BOOL isImage = (i_m1_type == t_bang && lte_p1_type == t_open_paren);
enum SyntaxElementType commitType = (isImage ? s_image : s_link);
PREPARE_RANGE_FROM_TOKEN(i_m1);
NSRange fromRange = (isImage ? i_m1_range : i_range);
// we're good!
PREPARE_RANGE_FROM_TOKEN(j);
NSRange commitRange = BRIDGE_RANGES(fromRange, j_range);
NSMutableString *linkValue = [[NSMutableString alloc] init];
BOOL isEmptyLink = (linkTextEnd+2 == j && lte_p1_type == t_open_bracket && j_type == t_close_bracket);
int linkValueFrom = isEmptyLink ? i+1 : linkTextEnd+2;
int linkValueTo = isEmptyLink ? linkTextEnd-1 : j-1;
for (int k = linkValueFrom; k <= linkValueTo; k++) {
PREPARE_TOKEN(k, k);
[linkValue appendString:k_token[@"Text"]];
}
COMMIT_ELEMENT(commitRange, commitType, @{nameOrURLKey: [NSString stringWithString:linkValue]});
CONTINUE_AT(j);
} else if (j_type == t_newline) {
// invalid, fallback to text
break;
}
}
}
} break;
#pragma mark list items
case t_bullet: {
// enforce paragraph entrance, unless we're already inside a list
if (i-1 != lastCommittedListItemToken) {
CHECK_ENTERS_AS_PARAGRAPH;
}
for (int j = i+1; j < count+1; j++) {
PREPARE_TOKEN(j, j);
// looking for newline or EOF
if (!(j_type == t_newline || j_type == 0)) continue;
PREPARE_TOKEN(j+1, j_p1);
// next token can be a paragraph end or another bullet
// Note: we don't know whether next bullet is valid at this point, but fuck it
if (j_p1_type == t_newline || j_p1_type == t_bullet || j_p1_type == 0) {
PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
COMMIT_ELEMENT(commitRange, s_list_item, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
lastCommittedListItemToken = j;
CONTINUE_AT(i+1); // we continue inside the list item, so we can parse inline styles, too
} else if (j_p1_type == t_indent) {
continue; // next line being an indent means it's part of the list item
} else if (j_p1_type == t_space) {
// if it's a space, we'll also allow a combination of X space, where X is the length of the list item bullet
BOOL valid = YES;
int k = j+2;
for (; k <= j+i_range.length; k++) {
PREPARE_TOKEN(k, k);
if (k_type == t_space) continue;
// else invalid
valid = NO;
break;
}
if (valid) {
j = k;
continue; // valid
} else {
break; // invalid
}
} else {
break; // invalid, discard entire item and move on
}
}
} break;
#pragma mark - default
case t_text: default: {
// do nothing
} break;
}
next_syntax_element: continue;
}
return syntaxElements;
}
//
// MarkdownSyntaxHighlighter.m
// Macchiato
//
// Created by Kenneth Ballenegger on 7/24/12.
// Copyright (c) 2012 Azure Talon. All rights reserved.
//
#import "MarkdownSyntaxHighlighter.h"
#import "MacchiatoThemeController.h"
#import "markdown.lexer.h"
#import "MarkdownGrammar.h"
static NSString *const TokenTypeAttributeName = @"TokenTypeAttributeName";
static NSString *const SyntaxElementTypeAttributeName = @"SyntaxElementTypeAttributeName";
@interface MarkdownSyntaxHighlighter () <NSLayoutManagerDelegate> {
@private
NSTextStorage *textStorage;
}
@end
@implementation MarkdownSyntaxHighlighter
- (id)initWithTextStorage:(NSTextStorage *)storage {
if (self = [super init]) {
self->textStorage = storage;
}
return self;
}
- (id)init {
NSLog(@"You should only be using initWithTextStorage:.");
abort();
}
- (void)highlightRange:(NSRange)range {
// return;
if (range.location == 0 && range.length == 0)
return; // don't highlight empty document it's buggzy
NSMutableArray *tokens = [NSMutableArray array];
NSString *substringToLex = [self->textStorage.string substringWithRange:range];
// testing the lexer
lex([substringToLex cStringUsingEncoding:NSUTF8StringEncoding], ^(enum TokenType ttype, const char *ttext, NSRange trange) {
[tokens addObject:@{@"TokenType": [NSNumber numberWithInt:ttype], @"Range": [NSValue valueWithRange:trange], @"Text": [NSString stringWithUTF8String:ttext]}];
});
NSArray *syntaxElements = MarkdownSyntaxElementsFromTokens(tokens);
// this should be done on main thread
dispatch_sync(dispatch_get_main_queue(), ^{
// reset attributes globally
// [self->textStorage setAttributes:nil range:range];
[self highlightSyntaxElement:@{@"Range": [NSValue valueWithRange:NSMakeRange(0, range.length)], @"SyntaxElementType": [NSNumber numberWithInt:s_default]} inRange:range];
// set attributes for syntax element tokens
for (NSDictionary *syntaxElement in syntaxElements) {
[self highlightSyntaxElement:syntaxElement inRange:range];
}
// debug mode
NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults];
if ([defaults integerForKey:@"MacchiatoDebugMode"] == 1) {
for (NSDictionary *token in tokens) {
float f = ((int)[token[@"TokenType"] intValue] - 10) / 20.0f;
NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
NSRange absoluteRange = [token[@"Range"] rangeValue];
absoluteRange.location += range.location;
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
}
} else if ([defaults integerForKey:@"MacchiatoDebugMode"] == 2) {
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:[NSColor colorWithCalibratedHue:(s_default - 10) / 20.0f saturation:1 brightness:1 alpha:.7] range:range];
for (NSDictionary *syntaxElement in syntaxElements) {
float f = ((int)[syntaxElement[@"SyntaxElementType"] intValue] - 10) / 20.0f;
NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
absoluteRange.location += range.location;
[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
}
}
});
}
- (void)highlightSyntaxElement:(NSDictionary *)syntaxElement inRange:(NSRange)range {
enum SyntaxElementType stype = ((NSNumber *)syntaxElement[@"SyntaxElementType"]).intValue;
NSMutableDictionary *attrs = [NSMutableDictionary dictionary];
MacchiatoThemeController *themeController = [MacchiatoThemeController sharedController];
// use defaults
attrs[NSFontAttributeName] = [themeController currentMainFont];
attrs[NSForegroundColorAttributeName] = [themeController currentTextForegroundColor];
#define CASE_HEADLINE_TYPE(index) case s_headline_##index: { attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait] toSize:20.0f - index]; } break;
switch (stype) {
case s_emphasis: {
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait];
} break;
case s_strong: {
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait];
} break;
case s_strong_emphasis: {
attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait] toHaveTrait:NSFontBoldTrait];
} break;
case s_code:
case s_code_block: {
attrs[NSFontAttributeName] = [themeController currentMonospacedFont];
} break;
case s_blockquote: {
attrs[NSFontAttributeName] = [themeController currentQuoteFont];
} break;
case s_link:
case s_image:
case s_link_info: {
attrs[NSUnderlineStyleAttributeName] = [NSNumber numberWithInt:NSSingleUnderlineStyle];
attrs[NSUnderlineColorAttributeName] = [themeController currentUnderlineColor];
attrs[NSCursorAttributeName] = [NSCursor pointingHandCursor];
if (stype == s_link) {
if (syntaxElement[@"URL"]) {
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenURL);
attrs[@"URL"] = syntaxElement[@"URL"];
} else if (syntaxElement[@"Name"]) {
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenNamedLink);
attrs[@"Name"] = syntaxElement[@"Name"];
}
} else {
attrs[@"MarkdownClickAction"] = @(MarkdownClickActionRenderImage);
attrs[@"URL"] = syntaxElement[@"URL"];
}
} break;
case s_rule: {
NSMutableParagraphStyle *paragraphStyle = [[themeController currentParagraphStyle] mutableCopy];
[paragraphStyle setAlignment:NSCenterTextAlignment];
attrs[NSParagraphStyleAttributeName] = paragraphStyle;
} break;
CASE_HEADLINE_TYPE(1);
CASE_HEADLINE_TYPE(2);
CASE_HEADLINE_TYPE(3);
CASE_HEADLINE_TYPE(4);
CASE_HEADLINE_TYPE(5);
CASE_HEADLINE_TYPE(6);
case s_default: default: {
// do nothing
} break;
}
// callback on named link info
if (stype == s_link_info && self.namedLinkCallback) {
self.namedLinkCallback(syntaxElement[@"Name"], syntaxElement[@"URL"]);
}
NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
absoluteRange.location += range.location;
attrs[@"SyntexElementRange"] = [NSValue valueWithRange:absoluteRange];
[self->textStorage setAttributes:attrs range:absoluteRange];
for (NSValue *markupStatementRangeValue in syntaxElement[@"MakupTokenRanges"]) {
NSRange markupStatementRange = markupStatementRangeValue.rangeValue;
markupStatementRange.location += range.location;
[self->textStorage addAttribute:NSForegroundColorAttributeName value:[themeController currentTextSubduedColor] range:markupStatementRange];
}
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment