kballenegger/MarkdownGrammar.m Secret

## markdown.l
/*** Definition section ***/

%{
/* C code to be copied verbatim */
#include <stdio.h>
#include <stdlib.h>
#include "markdown.lexer.h"

#define TOKEN(type) \
    BEGIN(neutral); \
    concat_previous_text_token(); \
    record_token(yytext, yyleng, type)


#define T_TEXT_BUFFER_SIZE_INCREMENT 1024

#define MULTIBYTE_RANGE_ADJUSTING_ENABLED 1


void concat_previous_text_token(void);
void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type);

int update_multibytes_encountered(char *text, unsigned long len);
void record_token(char *text, unsigned long length, enum TokenType type);

// global state
NSUInteger token_index = 0;
TokenLexedBlock token_callback_block = nil;

static dispatch_queue_t lexer_queue;
static dispatch_queue_t calling_queue;

int multibytes_encountered = 0;

// concatenation of text tokens
char *last_text_token = NULL;
int last_text_token_len = 0;
int last_text_token_buffer_size = 0;
enum TokenType last_text_token_token_type = t_text;


%}
/*** Regex definition section ***/

em                  "*"|"_"
rule                ^"---"|"***"|"==="$
symbol_bullet       " "{0,3}("•"|"-"|"*"|"✓"|"✗")(" "|\t)
number_bullet       " "{0,3}([0-9]+("."|")"))(" "|\t)
bullet              {symbol_bullet}|{number_bullet}
indent              \t|" "{4}
punctuation         "."|","|"?"|"!"|"-"|"–"|"—"|"/"|"\\"|"'"|"\""|":"|";"|"("|")"|"["|"]"|"|"

/* This tells flex to read only one input file */
%option noyywrap

/* states */
%s neutral
%s after_bullet
%s in_paren
%s in_bracket

%%
    /*** Rules section ***/


<in_paren>")"       { TOKEN(t_close_paren); }
<in_paren>.         { append_to_text_token_buffer(yytext, yyleng, t_paren_text); BEGIN(in_paren); }
<in_bracket>"]"     { TOKEN(t_close_bracket); }
<in_bracket>.       { append_to_text_token_buffer(yytext, yyleng, t_bracket_text); BEGIN(in_bracket); }
{rule}              { TOKEN(t_rule); }
^{bullet}           { TOKEN(t_bullet); BEGIN(after_bullet); }
{em}{3}             { TOKEN(t_strong_emphasis); }
{em}{2}             { TOKEN(t_strong); }
{em}                { TOKEN(t_emphasis); }
:                   { TOKEN(t_colon); }
```                 { TOKEN(t_triple_backtick); }
`                   { TOKEN(t_backtick); }
^{indent}           { TOKEN(t_indent); }
^">"                { TOKEN(t_quotemark); }
"!"                 { TOKEN(t_bang); }
"("                 { TOKEN(t_open_paren); BEGIN(in_paren); }
"["                 { TOKEN(t_open_bracket); BEGIN(in_bracket); }
^"-"+$              { TOKEN(t_headline_underline); }
^"="+$              { TOKEN(t_headline_double_underline); }
^"#"{1,6}           { TOKEN(t_headline_mark); }
"#"{1,6}$           { TOKEN(t_headline_mark); }
"  \n"              { TOKEN(t_hard_newline); }
"\n"                { TOKEN(t_newline); }
" "                 { TOKEN(t_space); }
{punctuation}       { TOKEN(t_punctuation); }
.                   { append_to_text_token_buffer(yytext, yyleng, t_text); }


%%
/*** C Code section ***/


void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type) {

    // if the type is different, commit the token
    if (type != last_text_token_token_type) concat_previous_text_token();

    // reallocate memory if necessary
    if ((last_text_token_len + len) >= last_text_token_buffer_size) { // >= to make sure there's always at least 1 byte padding for null termination
        last_text_token_buffer_size += T_TEXT_BUFFER_SIZE_INCREMENT;
        last_text_token = realloc(last_text_token, last_text_token_buffer_size);
    }
    // *(last_text_token + last_text_token_len) = *text; // write data
    for (unsigned int i = 0; i <= len; i++) {
        *(last_text_token + last_text_token_len + i) = *(text+i);
    }
    last_text_token_len += len;

    // ensure type
    last_text_token_token_type = type;
}

void record_token(char *text, unsigned long length, enum TokenType type) {
    dispatch_sync(calling_queue, ^{
        token_callback_block(type, text,
            NSMakeRange(token_index - multibytes_encountered, length - update_multibytes_encountered(text, length)));
    });
    token_index += length;
}

int update_multibytes_encountered(char *text, unsigned long len) {
    if (!MULTIBYTE_RANGE_ADJUSTING_ENABLED) return 0;
    int count = 0;
    for (int i = 0; i < len; i++) {
        if ((*(text+i) & 0b10000000) == 0b10000000 &&       // check if it's a multi-char byte
            (~*(text+i) & 0b01000000) == 0b01000000) {      // and check that it's not a leading byte
            count++;
        }
    }
    multibytes_encountered += count;
    return count;
}

void concat_previous_text_token() {
    if (last_text_token_len==0) return;

    record_token(last_text_token, last_text_token_len, last_text_token_token_type);

    // clean up
    free(last_text_token);
    last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1);
    last_text_token_len = 0;
    last_text_token_token_type = t_text;
    last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;
}

void lex(const char *data, TokenLexedBlock block) {
    // create global queue
    static dispatch_once_t lexer_queue_creation;
    dispatch_once(&lexer_queue_creation, ^{
        lexer_queue = dispatch_queue_create("com.azuretalon.Macchiato.lexer_queue", NULL);
    });

    dispatch_queue_t current_calling_queue = dispatch_get_current_queue();
    dispatch_sync(lexer_queue, ^{
        // reset global state
        token_callback_block = block;
        token_index = 0;
        multibytes_encountered = 0;

        calling_queue = current_calling_queue; // capture from scope

        last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1); // allocate a kilobyte, zero it out
        last_text_token_len = 0;
        last_text_token_token_type = t_text;
        last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;

        // lex
        YY_BUFFER_STATE buf = yy_scan_string(data);
        yylex();
        concat_previous_text_token(); // grab any remaining text
        yy_delete_buffer(buf);

        // free
        free (last_text_token);
        last_text_token = NULL;
        last_text_token_len = 0;
        last_text_token_buffer_size = 0;
    });
}

## MarkdownGrammar.m
//
//  MarkdownGrammar.m
//  Macchiato
//
//  Created by Kenneth Ballenegger on 8/2/12.
//  Copyright (c) 2012 Azure Talon. All rights reserved.
//

#import "MarkdownGrammar.h"

#import "markdown.lexer.h"


#define DEBUGGING_GRAMMAR 0


// TODO: comment with example input and output


NSArray * MarkdownSyntaxElementsFromTokens(NSArray *tokens) {


    NSMutableArray *syntaxElements = [NSMutableArray array];

    NSUInteger count = [tokens count];

#define PREPARE_TOKEN(index, var)                                       \
    NSDictionary *var##_token; enum TokenType var##_type;               \
    if (index >= 0 && index < count) {                                  \
        var##_token = [tokens objectAtIndex:index];                     \
        var##_type = [var##_token[@"TokenType"] intValue];              \
    } else {                                                            \
        var##_token = nil;                                              \
        var##_type = 0;                                                 \
    }                                                                   // END OF MACRO

#define PREPARE_RANGE_FROM_TOKEN(var)                                   \
    NSRange var##_range;                                                \
    if (var##_token)                                                    \
        var##_range = [var##_token[@"Range"] rangeValue];               // END OF MACRO

#define PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(var)                    \
    NSRange toRange;                                                    \
    if (var##_type) {                                                   \
        toRange = [var##_token[@"Range"] rangeValue];                   \
    } else {                                                            \
        PREPARE_TOKEN(var-1, toRange_##var##_m1);                       \
        toRange = [toRange_##var##_m1_token[@"Range"] rangeValue];      \
    }                                                                   // END OF MACRO

#define BRIDGE_RANGES(first, second)                                    \
    NSMakeRange(first.location,                                         \
        second.location + second.length - first.location)               // END OF MACRO

#define CONTINUE_AT(j)                                                  \
    i = j;                                                              \
    goto next_syntax_element;                                           // END OF MACRO

#define COMMIT_ELEMENT(range, type, extra)                              \
{ /* guard scope for var declaration */                                 \
    NSMutableDictionary *metaDict =                                     \
        [NSMutableDictionary dictionaryWithDictionary:                  \
         @{@"Range": [NSValue valueWithRange:range],                    \
           @"SyntaxElementType": [NSNumber numberWithInt:type]}];       \
    if (extra) [metaDict addEntriesFromDictionary:extra];               \
    [syntaxElements addObject:metaDict];                                \
    if (DEBUGGING_GRAMMAR) NSLog(@"found token type %d", type);         \
}                                                                       // END OF MACRO


#define CASE_SIMPLE_PAIR_FOR_TYPE(t_type, s_type)                       \
    case t_type: {                                                      \
        for (int j = i+1; j < count; j++) {                             \
            PREPARE_TOKEN(j, j);                                        \
            if (j_type == t_type) {                                     \
                /* next must be space, newline or punctuation */        \
                PREPARE_TOKEN(j+1, j_p1);                               \
                if (!(j_p1_type == t_space ||                           \
                    j_p1_type == t_newline ||                           \
                    j_p1_type == t_punctuation)) continue;              \
                PREPARE_RANGE_FROM_TOKEN(j);                            \
                NSRange commitRange = BRIDGE_RANGES(i_range, j_range);  \
                NSDictionary *metaDictionary =                          \
                    @{@"MakupTokenRanges": @[                           \
                        [NSValue valueWithRange:i_range],               \
                        [NSValue valueWithRange:j_range]]};             \
                COMMIT_ELEMENT(commitRange, s_type, metaDictionary);    \
                CONTINUE_AT(j);                                         \
            } else if (j_type == t_newline) {                           \
                /* invalid, fallback to text */                         \
                break;                                                  \
            }                                                           \
        }                                                               \
    } break;                                                            // END OF MACRO

#define CHECK_ENTERS_AS_PARAGRAPH                                       \
    PREPARE_TOKEN(i-1, i_m1);                                           \
    PREPARE_TOKEN(i-2, i_m2);                                           \
    if (!((i_m1_type == t_newline || i_m1_type == 0) &&                 \
        (i_m2_type == t_newline || i_m2_type == 0))) break;             // END OF MACRO


#pragma mark -
    // LOOP STARTS HERE

    // for list items, we need to keep track of the last token that was part of a list item
    int lastCommittedListItemToken = -1;


    for (int i = 0; i < count; i++) {
        PREPARE_TOKEN(i, i);
        PREPARE_RANGE_FROM_TOKEN(i);

        if (DEBUGGING_GRAMMAR) NSLog(@"big loop iteration %d", i);


        // figure out which opening token it is

        switch (i_type) {
#pragma mark inline code
            CASE_SIMPLE_PAIR_FOR_TYPE(t_backtick, s_code);
#pragma mark emphasis
            CASE_SIMPLE_PAIR_FOR_TYPE(t_emphasis, s_emphasis);
#pragma mark strong
            CASE_SIMPLE_PAIR_FOR_TYPE(t_strong, s_strong);
#pragma mark strong emphasis
            CASE_SIMPLE_PAIR_FOR_TYPE(t_strong_emphasis, s_strong_emphasis);

#pragma mark code block
            // code block
            case t_indent: {
                // a code block must be:
                // - preceded by two newlines (or nothingness)
                // - followed by two newline (or nothingness)

                CHECK_ENTERS_AS_PARAGRAPH;

                for (int j = i+1; j < count+1; j++) {
                    PREPARE_TOKEN(j, j);

                    // looking for the newline, still part of code block
                    if (!(j_type == t_newline || j_type == 0)) continue;

                    // look ahead to check if next token is also newline
                    PREPARE_TOKEN(j+1, j_p1);
                    if (j_p1_type == t_newline || j_p1_type == 0) {
                        PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
                        NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
                        COMMIT_ELEMENT(commitRange, s_code_block, nil);
                        CONTINUE_AT(j);
                    } else if (j_p1_type != t_indent) {
                        break; // a newline followed by something that isn't an indent means invalid code block
                    }
                }
            } break;

#pragma mark blockquote
            case t_quotemark: {
                // a quotation block must be:
                // - preceded by two newlines (or nothingness)
                // - followed by two newline (or nothingness)

                CHECK_ENTERS_AS_PARAGRAPH;

                NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];

                // we go to count+1 so we can catch a blockquote at the very end of the string
                for (int j = i+1; j < count+1; j++) {
                    PREPARE_TOKEN(j, j);

                    if (j_type == t_quotemark) [markupTokenRanges addObject:j_token[@"Range"]];

                    // looking for the newline, still part of blockquote
                    if (!(j_type == t_newline || j_type == 0)) continue;

                    // look ahead to check if next token is also newline
                    PREPARE_TOKEN(j+1, j_p1);
                    if (j_p1_type == t_newline || j_p1_type == 0) {
                        PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
                        NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
                        COMMIT_ELEMENT(commitRange, s_blockquote, @{@"MakupTokenRanges": markupTokenRanges});
                        CONTINUE_AT(j);
                    } else if (j_p1_type != t_quotemark) {
                        break; // a newline followed by something that isn't a quotemark means invalid blockquote
                    }
                }
            } break;

#pragma mark headlines
            case t_headline_mark: {
                // a regular headline must be:
                // - preceded by two newlines (or nothingness)
                // - followed by two newline (or nothingness)

                NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];

                CHECK_ENTERS_AS_PARAGRAPH;

                // we go to count+1 so we can catch a headline at the very end of the string
                for (int j = i+1; j < count+1; j++) {
                    PREPARE_TOKEN(j, j);

                    // looking for the newline, still part of code block
                    if (!(j_type == t_newline || j_type == 0)) continue;

                    // look ahead to check if next token is also newline
                    PREPARE_TOKEN(j+1, j_p1);
                    if ((j_p1_type == t_newline || j_p1_type == 0)) {

                        PREPARE_TOKEN(j-1, j_m1);
                        if (j_m1_type == t_headline_mark)
                            [markupTokenRanges addObject:j_m1_token[@"Range"]];

                        PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
                        NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
                        enum SyntaxElementType commitType = (enum SyntaxElementType)((int)s_headline_1 + (MAX(1, MIN(6, [i_token[@"Text"] length])) - 1));

                        COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": markupTokenRanges});
                        CONTINUE_AT(j);
                    } else {
                        break; // this means a multi-line headline, which is not allowed
                    }
                }
            } break;

#pragma mark github code block
            case t_triple_backtick: {
                // a github-style code block is a paragraph

                CHECK_ENTERS_AS_PARAGRAPH;

                for (int j = i+1; j < count; j++) {
                    PREPARE_TOKEN(j, j);

                    // iterate until we find the matching triple backtick
                    if (j_type != t_triple_backtick) continue;

                    // look ahead to check if next two tokens are newlines
                    PREPARE_TOKEN(j+1, j_p1);
                    PREPARE_TOKEN(j+2, j_p2);
                    if ((j_p1_type == t_newline || j_p1_type == 0) &&
                        (j_p2_type == t_newline || j_p2_type == 0)) {

                        int j_p1 = j+1;
                        PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j_p1);
                        NSRange commitRange = BRIDGE_RANGES(i_range, toRange);

                        NSDictionary *metaDictionary = @{@"MakupTokenRanges": @[i_token[@"Range"], j_token[@"Range"]]};

                        COMMIT_ELEMENT(commitRange, s_code_block, metaDictionary);
                        CONTINUE_AT(j+1);
                    }
                }
            } break;

#pragma mark underlined headline
            case t_headline_underline: case t_headline_double_underline: {

                // previous token MUST be newline
                PREPARE_TOKEN(i-1, i_m1);
                if (i_m1_type != t_newline) break;

                // if no preceding newline found, assume zero -- this deals with headline at start of file
                int startIndex = 0;

                // loop backwards to find the actual headline
                for (int j = i-2; j >= 0; j--) {
                    PREPARE_TOKEN(j, j);

                    // i'm looking for a preceding newline to find beginning of previous line
                    if (j_type == t_newline) {
                        startIndex = j+1;
                        break;
                    }
                }

                // verify that it starts as paragraph, or on bounds
                PREPARE_TOKEN(startIndex-1, s_m1);
                if (!(s_m1_type == t_newline || s_m1_type == 0)) break;

                PREPARE_TOKEN(startIndex-2, s_m2);
                if (!(s_m2_type == t_newline || s_m2_type == 0)) break;

                // verify that it's followed by paragraph end
                PREPARE_TOKEN(i+1, i_p1);
                if (!(i_p1_type == t_newline || i_p1_type == 0)) break;

                PREPARE_TOKEN(i+2, i_p2);
                if (!(i_p2_type == t_newline || i_p2_type == 0)) break;

                // if we got to here without breaking, we're a valid match
                PREPARE_TOKEN(startIndex, start);
                PREPARE_RANGE_FROM_TOKEN(i_p1)
                NSRange start_range = [start_token[@"Range"] rangeValue];
                NSRange commitRange = BRIDGE_RANGES(start_range, i_p1_range);

                enum SyntaxElementType commitType;
                if (i_type == t_headline_double_underline)
                    commitType = s_headline_1;
                else
                    commitType = s_headline_2;
                COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
                CONTINUE_AT(i+1); // eat newline

            } break;

#pragma mark rule
            case t_rule: {
                CHECK_ENTERS_AS_PARAGRAPH;
                PREPARE_TOKEN(i+1, i_p1);
                PREPARE_TOKEN(i+2, i_p2);
                if ((i_p1_type == t_newline || i_p1_type == 0) &&
                        (i_p2_type == t_newline || i_p2_type == 0)) {
                    COMMIT_ELEMENT(i_range, s_rule, nil);
                    CONTINUE_AT(i+1);
                }
            } break;

#pragma mark links & images
            case t_open_bracket: {

                // seek end of link text
                int linkTextEnd = -1;
                for (int j = i+1; j < count; j++) {
                    PREPARE_TOKEN(j, j);
                    if (j_type == t_close_bracket) {
                        // must be followed by [, (, or :, else invalid
                        PREPARE_TOKEN(j+1, j_p1);
                        if (!(j_p1_type == t_open_bracket ||
                              j_p1_type == t_open_paren ||
                              j_p1_type == t_colon)) break;

                        // found!
                        linkTextEnd = j; break;
                    } else if (j_type == t_newline) {
                        // invalid, fallback to text
                        break;
                    }
                }
                if (linkTextEnd < 0) break; // end not found, this is not a link

                // now that we have end of link text, find rest of link
                PREPARE_TOKEN(linkTextEnd+1, lte_p1); // prepare "link target open" token, [ or (
                // start iterating after the lto

                if (lte_p1_type == t_colon) {
                    // we've got a link definition
                    // now we've gotta check how the grammar element started, definitions need to be one per line
                    PREPARE_TOKEN(i-1, i_m1);
                    // also, t_colon needs to be followed by a space
                    PREPARE_TOKEN(linkTextEnd+2, lte_p2);

                    if (!(i_m1_type == t_newline || i_m1_type == 0) ||
                        lte_p2_type != t_space) {
                        // fail
                        CONTINUE_AT(i+1);
                    }

                    for (int j = linkTextEnd+3; j < count; j++) {
                        PREPARE_TOKEN(j, j);

                        // looking for newline or end of file
                        if (j_type == t_newline || j_type == 0) {

                            // we're good!
                            PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
                            NSRange commitRange = BRIDGE_RANGES(i_range, toRange);

                            // prepare name range
                            // NOTE: this is meant for ARC, i guess… not super clean. fuck.
                            NSMutableString *name = [[NSMutableString alloc] init];
                            for (int k = i+1; k <= linkTextEnd-1; k++) {
                                PREPARE_TOKEN(k, k);
                                [name appendString:k_token[@"Text"]];
                            }


                            // prepare url range
                            NSMutableString *url = [[NSMutableString alloc] init];
                            for (int k = linkTextEnd+3; k <= j-1; k++) {
                                PREPARE_TOKEN(k, k);
                                [url appendString:k_token[@"Text"]];
                            }

                            NSDictionary *metaDictionary = @{
                                @"Name": [NSString stringWithString:name],
                                @"URL": [NSString stringWithString:url]
                            };

                            COMMIT_ELEMENT(commitRange, s_link_info, metaDictionary);
                            CONTINUE_AT(j);
                        }
                    }

                } else if (lte_p1_type == t_open_bracket || lte_p1_type == t_open_paren) {
                    // we've got an inline link

                    for (int j = linkTextEnd+2; j < count; j++) {
                        PREPARE_TOKEN(j, j);
                        if ((lte_p1_type == t_open_bracket && j_type == t_close_bracket) ||
                            (lte_p1_type == t_open_paren && j_type == t_close_paren)) {
                            // find closing ] or )

                            // find what link points to
                            NSString *nameOrURLKey = (lte_p1_type == t_open_bracket ? @"Name" : @"URL");

                            // actually, it may be an image if it's preceded by a bang!
                            PREPARE_TOKEN(i-1, i_m1);
                            BOOL isImage = (i_m1_type == t_bang && lte_p1_type == t_open_paren);
                            enum SyntaxElementType commitType = (isImage ? s_image : s_link);
                            PREPARE_RANGE_FROM_TOKEN(i_m1);
                            NSRange fromRange = (isImage ? i_m1_range : i_range);

                            // we're good!
                            PREPARE_RANGE_FROM_TOKEN(j);
                            NSRange commitRange = BRIDGE_RANGES(fromRange, j_range);

                            NSMutableString *linkValue = [[NSMutableString alloc] init];
                            BOOL isEmptyLink = (linkTextEnd+2 == j && lte_p1_type == t_open_bracket && j_type == t_close_bracket);
                            int linkValueFrom = isEmptyLink ? i+1 : linkTextEnd+2;
                            int linkValueTo = isEmptyLink ? linkTextEnd-1 : j-1;
                            for (int k = linkValueFrom; k <= linkValueTo; k++) {
                                PREPARE_TOKEN(k, k);
                                [linkValue appendString:k_token[@"Text"]];
                            }

                            COMMIT_ELEMENT(commitRange, commitType, @{nameOrURLKey: [NSString stringWithString:linkValue]});
                            CONTINUE_AT(j);
                        } else if (j_type == t_newline) {
                            // invalid, fallback to text
                            break;
                        }
                    }
                }

            } break;

#pragma mark list items
            case t_bullet: {

                // enforce paragraph entrance, unless we're already inside a list
                if (i-1 != lastCommittedListItemToken) {
                    CHECK_ENTERS_AS_PARAGRAPH;
                }

                for (int j = i+1; j < count+1; j++) {

                    PREPARE_TOKEN(j, j);
                    // looking for newline or EOF
                    if (!(j_type == t_newline || j_type == 0)) continue;

                    PREPARE_TOKEN(j+1, j_p1);
                    // next token can be a paragraph end or another bullet
                    // Note: we don't know whether next bullet is valid at this point, but fuck it
                    if (j_p1_type == t_newline || j_p1_type == t_bullet || j_p1_type == 0) {
                        PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
                        NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
                        COMMIT_ELEMENT(commitRange, s_list_item, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
                        lastCommittedListItemToken = j;
                        CONTINUE_AT(i+1); // we continue inside the list item, so we can parse inline styles, too
                    } else if (j_p1_type == t_indent) {
                        continue; // next line being an indent means it's part of the list item
                    } else if (j_p1_type == t_space) {
                        // if it's a space, we'll also allow a combination of X space, where X is the length of the list item bullet
                        BOOL valid = YES;
                        int k = j+2;
                        for (; k <= j+i_range.length; k++) {
                            PREPARE_TOKEN(k, k);
                            if (k_type == t_space) continue;
                            // else invalid
                            valid = NO;
                            break;
                        }
                        if (valid) {
                            j = k;
                            continue; // valid
                        } else {
                            break; // invalid
                        }
                    } else {
                        break; // invalid, discard entire item and move on
                    }
                }

            } break;

#pragma mark - default
            case t_text: default: {
                // do nothing
            } break;
        }

    next_syntax_element: continue;
    }


    return syntaxElements;
}

## MarkdownSyntaxHighlighter.m
//
//  MarkdownSyntaxHighlighter.m
//  Macchiato
//
//  Created by Kenneth Ballenegger on 7/24/12.
//  Copyright (c) 2012 Azure Talon. All rights reserved.
//

#import "MarkdownSyntaxHighlighter.h"
#import "MacchiatoThemeController.h"

#import "markdown.lexer.h"
#import "MarkdownGrammar.h"


static NSString *const TokenTypeAttributeName = @"TokenTypeAttributeName";
static NSString *const SyntaxElementTypeAttributeName = @"SyntaxElementTypeAttributeName";


@interface MarkdownSyntaxHighlighter () <NSLayoutManagerDelegate> {
    @private
    NSTextStorage *textStorage;
}

@end


@implementation MarkdownSyntaxHighlighter

- (id)initWithTextStorage:(NSTextStorage *)storage {
    if (self = [super init]) {
        self->textStorage = storage;
    }
    return self;
}

- (id)init {
    NSLog(@"You should only be using initWithTextStorage:.");
    abort();
}


- (void)highlightRange:(NSRange)range {
//    return;

    if (range.location == 0 && range.length == 0)
        return; // don't highlight empty document it's buggzy

    NSMutableArray *tokens = [NSMutableArray array];


    NSString *substringToLex = [self->textStorage.string substringWithRange:range];

    // testing the lexer
	lex([substringToLex cStringUsingEncoding:NSUTF8StringEncoding], ^(enum TokenType ttype, const char *ttext, NSRange trange) {

        [tokens addObject:@{@"TokenType": [NSNumber numberWithInt:ttype], @"Range": [NSValue valueWithRange:trange], @"Text": [NSString stringWithUTF8String:ttext]}];

	});

    NSArray *syntaxElements = MarkdownSyntaxElementsFromTokens(tokens);

    // this should be done on main thread
    dispatch_sync(dispatch_get_main_queue(), ^{

        // reset attributes globally
//        [self->textStorage setAttributes:nil range:range];
        [self highlightSyntaxElement:@{@"Range": [NSValue valueWithRange:NSMakeRange(0, range.length)], @"SyntaxElementType": [NSNumber numberWithInt:s_default]} inRange:range];

        // set attributes for syntax element tokens
        for (NSDictionary *syntaxElement in syntaxElements) {

            [self highlightSyntaxElement:syntaxElement inRange:range];
        }

        // debug mode

        NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults];
        if ([defaults integerForKey:@"MacchiatoDebugMode"] == 1) {
            for (NSDictionary *token in tokens) {
                float f = ((int)[token[@"TokenType"] intValue] - 10) / 20.0f;
                NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
                NSRange absoluteRange = [token[@"Range"] rangeValue];
                absoluteRange.location += range.location;
                [self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
            }
        } else if ([defaults integerForKey:@"MacchiatoDebugMode"] == 2) {
            [self->textStorage addAttribute:NSBackgroundColorAttributeName value:[NSColor colorWithCalibratedHue:(s_default - 10) / 20.0f saturation:1 brightness:1 alpha:.7] range:range];
            for (NSDictionary *syntaxElement in syntaxElements) {
                float f = ((int)[syntaxElement[@"SyntaxElementType"] intValue] - 10) / 20.0f;
                NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
                NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
                absoluteRange.location += range.location;
                [self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
            }
        }
    });
}

- (void)highlightSyntaxElement:(NSDictionary *)syntaxElement inRange:(NSRange)range {

    enum SyntaxElementType stype = ((NSNumber *)syntaxElement[@"SyntaxElementType"]).intValue;

    NSMutableDictionary *attrs = [NSMutableDictionary dictionary];

    MacchiatoThemeController *themeController = [MacchiatoThemeController sharedController];

    // use defaults
    attrs[NSFontAttributeName] = [themeController currentMainFont];
    attrs[NSForegroundColorAttributeName] = [themeController currentTextForegroundColor];


#define CASE_HEADLINE_TYPE(index) case s_headline_##index: { attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait] toSize:20.0f - index]; } break;

    switch (stype) {
        case s_emphasis: {
            attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait];
        } break;
        case s_strong: {
            attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait];
        } break;
        case s_strong_emphasis: {
            attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait] toHaveTrait:NSFontBoldTrait];
        } break;
        case s_code:
        case s_code_block: {
            attrs[NSFontAttributeName] = [themeController currentMonospacedFont];
        } break;
        case s_blockquote: {
            attrs[NSFontAttributeName] = [themeController currentQuoteFont];
        } break;
        case s_link:
        case s_image:
        case s_link_info: {
            attrs[NSUnderlineStyleAttributeName] = [NSNumber numberWithInt:NSSingleUnderlineStyle];
            attrs[NSUnderlineColorAttributeName] = [themeController currentUnderlineColor];
            attrs[NSCursorAttributeName] = [NSCursor pointingHandCursor];
            if (stype == s_link) {
                if (syntaxElement[@"URL"]) {
                    attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenURL);
                    attrs[@"URL"] = syntaxElement[@"URL"];
                } else if (syntaxElement[@"Name"]) {
                    attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenNamedLink);
                    attrs[@"Name"] = syntaxElement[@"Name"];
                }
            } else {
                attrs[@"MarkdownClickAction"] = @(MarkdownClickActionRenderImage);
                attrs[@"URL"] = syntaxElement[@"URL"];
            }
        } break;
        case s_rule: {
            NSMutableParagraphStyle *paragraphStyle = [[themeController currentParagraphStyle] mutableCopy];
            [paragraphStyle setAlignment:NSCenterTextAlignment];
            attrs[NSParagraphStyleAttributeName] = paragraphStyle;
        } break;
        CASE_HEADLINE_TYPE(1);
        CASE_HEADLINE_TYPE(2);
        CASE_HEADLINE_TYPE(3);
        CASE_HEADLINE_TYPE(4);
        CASE_HEADLINE_TYPE(5);
        CASE_HEADLINE_TYPE(6);
        case s_default: default: {
            // do nothing
        } break;
    }

    // callback on named link info
    if (stype == s_link_info && self.namedLinkCallback) {
        self.namedLinkCallback(syntaxElement[@"Name"], syntaxElement[@"URL"]);
    }

    NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
    absoluteRange.location += range.location;

    attrs[@"SyntexElementRange"] = [NSValue valueWithRange:absoluteRange];

    [self->textStorage setAttributes:attrs range:absoluteRange];

    for (NSValue *markupStatementRangeValue in syntaxElement[@"MakupTokenRanges"]) {
        NSRange markupStatementRange = markupStatementRangeValue.rangeValue;
        markupStatementRange.location += range.location;

        [self->textStorage addAttribute:NSForegroundColorAttributeName value:[themeController currentTextSubduedColor] range:markupStatementRange];
    }
}


@end
	/* Definition section */

	%{
	/* C code to be copied verbatim */
	#include <stdio.h>
	#include <stdlib.h>
	#include "markdown.lexer.h"

	#define TOKEN(type) \
	BEGIN(neutral); \
	concat_previous_text_token(); \
	record_token(yytext, yyleng, type)


	#define T_TEXT_BUFFER_SIZE_INCREMENT 1024

	#define MULTIBYTE_RANGE_ADJUSTING_ENABLED 1


	void concat_previous_text_token(void);
	void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type);

	int update_multibytes_encountered(char *text, unsigned long len);
	void record_token(char *text, unsigned long length, enum TokenType type);

	// global state
	NSUInteger token_index = 0;
	TokenLexedBlock token_callback_block = nil;

	static dispatch_queue_t lexer_queue;
	static dispatch_queue_t calling_queue;

	int multibytes_encountered = 0;

	// concatenation of text tokens
	char *last_text_token = NULL;
	int last_text_token_len = 0;
	int last_text_token_buffer_size = 0;
	enum TokenType last_text_token_token_type = t_text;







	%}
	/* Regex definition section */

	em "*"\|"_"
	rule ^"---"\|"***"\|"==="$
	symbol_bullet " "{0,3}("•"\|"-"\|"*"\|"✓"\|"✗")(" "\|\t)
	number_bullet " "{0,3}([0-9]+("."\|")"))(" "\|\t)
	bullet {symbol_bullet}\|{number_bullet}
	indent \t\|" "{4}
	punctuation "."\|","\|"?"\|"!"\|"-"\|"–"\|"—"\|"/"\|"\\"\|"'"\|"\""\|":"\|";"\|"("\|")"\|"["\|"]"\|"\|"

	/* This tells flex to read only one input file */
	%option noyywrap

	/* states */
	%s neutral
	%s after_bullet
	%s in_paren
	%s in_bracket

	%%
	/* Rules section */


	<in_paren>")" { TOKEN(t_close_paren); }
	<in_paren>. { append_to_text_token_buffer(yytext, yyleng, t_paren_text); BEGIN(in_paren); }
	<in_bracket>"]" { TOKEN(t_close_bracket); }
	<in_bracket>. { append_to_text_token_buffer(yytext, yyleng, t_bracket_text); BEGIN(in_bracket); }
	{rule} { TOKEN(t_rule); }
	^{bullet} { TOKEN(t_bullet); BEGIN(after_bullet); }
	{em}{3} { TOKEN(t_strong_emphasis); }
	{em}{2} { TOKEN(t_strong); }
	{em} { TOKEN(t_emphasis); }
	: { TOKEN(t_colon); }
	``` { TOKEN(t_triple_backtick); }
	` { TOKEN(t_backtick); }
	^{indent} { TOKEN(t_indent); }
	^">" { TOKEN(t_quotemark); }
	"!" { TOKEN(t_bang); }
	"(" { TOKEN(t_open_paren); BEGIN(in_paren); }
	"[" { TOKEN(t_open_bracket); BEGIN(in_bracket); }
	^"-"+$ { TOKEN(t_headline_underline); }
	^"="+$ { TOKEN(t_headline_double_underline); }
	^"#"{1,6} { TOKEN(t_headline_mark); }
	"#"{1,6}$ { TOKEN(t_headline_mark); }
	" \n" { TOKEN(t_hard_newline); }
	"\n" { TOKEN(t_newline); }
	" " { TOKEN(t_space); }
	{punctuation} { TOKEN(t_punctuation); }
	. { append_to_text_token_buffer(yytext, yyleng, t_text); }









	%%
	/* C Code section */


	void append_to_text_token_buffer(char *text, unsigned long len, enum TokenType type) {

	// if the type is different, commit the token
	if (type != last_text_token_token_type) concat_previous_text_token();

	// reallocate memory if necessary
	if ((last_text_token_len + len) >= last_text_token_buffer_size) { // >= to make sure there's always at least 1 byte padding for null termination
	last_text_token_buffer_size += T_TEXT_BUFFER_SIZE_INCREMENT;
	last_text_token = realloc(last_text_token, last_text_token_buffer_size);
	}
	// (last_text_token + last_text_token_len) = text; // write data
	for (unsigned int i = 0; i <= len; i++) {
	(last_text_token + last_text_token_len + i) = (text+i);
	}
	last_text_token_len += len;

	// ensure type
	last_text_token_token_type = type;
	}

	void record_token(char *text, unsigned long length, enum TokenType type) {
	dispatch_sync(calling_queue, ^{
	token_callback_block(type, text,
	NSMakeRange(token_index - multibytes_encountered, length - update_multibytes_encountered(text, length)));
	});
	token_index += length;
	}

	int update_multibytes_encountered(char *text, unsigned long len) {
	if (!MULTIBYTE_RANGE_ADJUSTING_ENABLED) return 0;
	int count = 0;
	for (int i = 0; i < len; i++) {
	if ((*(text+i) & 0b10000000) == 0b10000000 && // check if it's a multi-char byte
	(~*(text+i) & 0b01000000) == 0b01000000) { // and check that it's not a leading byte
	count++;
	}
	}
	multibytes_encountered += count;
	return count;
	}

	void concat_previous_text_token() {
	if (last_text_token_len==0) return;

	record_token(last_text_token, last_text_token_len, last_text_token_token_type);

	// clean up
	free(last_text_token);
	last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1);
	last_text_token_len = 0;
	last_text_token_token_type = t_text;
	last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;
	}

	void lex(const char *data, TokenLexedBlock block) {
	// create global queue
	static dispatch_once_t lexer_queue_creation;
	dispatch_once(&lexer_queue_creation, ^{
	lexer_queue = dispatch_queue_create("com.azuretalon.Macchiato.lexer_queue", NULL);
	});

	dispatch_queue_t current_calling_queue = dispatch_get_current_queue();
	dispatch_sync(lexer_queue, ^{
	// reset global state
	token_callback_block = block;
	token_index = 0;
	multibytes_encountered = 0;

	calling_queue = current_calling_queue; // capture from scope

	last_text_token = calloc(T_TEXT_BUFFER_SIZE_INCREMENT, 1); // allocate a kilobyte, zero it out
	last_text_token_len = 0;
	last_text_token_token_type = t_text;
	last_text_token_buffer_size = T_TEXT_BUFFER_SIZE_INCREMENT;

	// lex
	YY_BUFFER_STATE buf = yy_scan_string(data);
	yylex();
	concat_previous_text_token(); // grab any remaining text
	yy_delete_buffer(buf);

	// free
	free (last_text_token);
	last_text_token = NULL;
	last_text_token_len = 0;
	last_text_token_buffer_size = 0;
	});
	}
	//
	// MarkdownGrammar.m
	// Macchiato
	//
	// Created by Kenneth Ballenegger on 8/2/12.
	// Copyright (c) 2012 Azure Talon. All rights reserved.
	//

	#import "MarkdownGrammar.h"

	#import "markdown.lexer.h"


	#define DEBUGGING_GRAMMAR 0



	// TODO: comment with example input and output


	NSArray * MarkdownSyntaxElementsFromTokens(NSArray *tokens) {


	NSMutableArray *syntaxElements = [NSMutableArray array];

	NSUInteger count = [tokens count];

	#define PREPARE_TOKEN(index, var) \
	NSDictionary *var##_token; enum TokenType var##_type; \
	if (index >= 0 && index < count) { \
	var##_token = [tokens objectAtIndex:index]; \
	var##_type = [var##_token[@"TokenType"] intValue]; \
	} else { \
	var##_token = nil; \
	var##_type = 0; \
	} // END OF MACRO

	#define PREPARE_RANGE_FROM_TOKEN(var) \
	NSRange var##_range; \
	if (var##_token) \
	var##_range = [var##_token[@"Range"] rangeValue]; // END OF MACRO

	#define PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(var) \
	NSRange toRange; \
	if (var##_type) { \
	toRange = [var##_token[@"Range"] rangeValue]; \
	} else { \
	PREPARE_TOKEN(var-1, toRange_##var##_m1); \
	toRange = [toRange_##var##_m1_token[@"Range"] rangeValue]; \
	} // END OF MACRO

	#define BRIDGE_RANGES(first, second) \
	NSMakeRange(first.location, \
	second.location + second.length - first.location) // END OF MACRO

	#define CONTINUE_AT(j) \
	i = j; \
	goto next_syntax_element; // END OF MACRO

	#define COMMIT_ELEMENT(range, type, extra) \
	{ /* guard scope for var declaration */ \
	NSMutableDictionary *metaDict = \
	[NSMutableDictionary dictionaryWithDictionary: \
	@{@"Range": [NSValue valueWithRange:range], \
	@"SyntaxElementType": [NSNumber numberWithInt:type]}]; \
	if (extra) [metaDict addEntriesFromDictionary:extra]; \
	[syntaxElements addObject:metaDict]; \
	if (DEBUGGING_GRAMMAR) NSLog(@"found token type %d", type); \
	} // END OF MACRO


	#define CASE_SIMPLE_PAIR_FOR_TYPE(t_type, s_type) \
	case t_type: { \
	for (int j = i+1; j < count; j++) { \
	PREPARE_TOKEN(j, j); \
	if (j_type == t_type) { \
	/* next must be space, newline or punctuation */ \
	PREPARE_TOKEN(j+1, j_p1); \
	if (!(j_p1_type == t_space \|\| \
	j_p1_type == t_newline \|\| \
	j_p1_type == t_punctuation)) continue; \
	PREPARE_RANGE_FROM_TOKEN(j); \
	NSRange commitRange = BRIDGE_RANGES(i_range, j_range); \
	NSDictionary *metaDictionary = \
	@{@"MakupTokenRanges": @[ \
	[NSValue valueWithRange:i_range], \
	[NSValue valueWithRange:j_range]]}; \
	COMMIT_ELEMENT(commitRange, s_type, metaDictionary); \
	CONTINUE_AT(j); \
	} else if (j_type == t_newline) { \
	/* invalid, fallback to text */ \
	break; \
	} \
	} \
	} break; // END OF MACRO

	#define CHECK_ENTERS_AS_PARAGRAPH \
	PREPARE_TOKEN(i-1, i_m1); \
	PREPARE_TOKEN(i-2, i_m2); \
	if (!((i_m1_type == t_newline \|\| i_m1_type == 0) && \
	(i_m2_type == t_newline \|\| i_m2_type == 0))) break; // END OF MACRO


	#pragma mark -
	// LOOP STARTS HERE

	// for list items, we need to keep track of the last token that was part of a list item
	int lastCommittedListItemToken = -1;


	for (int i = 0; i < count; i++) {
	PREPARE_TOKEN(i, i);
	PREPARE_RANGE_FROM_TOKEN(i);

	if (DEBUGGING_GRAMMAR) NSLog(@"big loop iteration %d", i);


	// figure out which opening token it is

	switch (i_type) {
	#pragma mark inline code
	CASE_SIMPLE_PAIR_FOR_TYPE(t_backtick, s_code);
	#pragma mark emphasis
	CASE_SIMPLE_PAIR_FOR_TYPE(t_emphasis, s_emphasis);
	#pragma mark strong
	CASE_SIMPLE_PAIR_FOR_TYPE(t_strong, s_strong);
	#pragma mark strong emphasis
	CASE_SIMPLE_PAIR_FOR_TYPE(t_strong_emphasis, s_strong_emphasis);

	#pragma mark code block
	// code block
	case t_indent: {
	// a code block must be:
	// - preceded by two newlines (or nothingness)
	// - followed by two newline (or nothingness)

	CHECK_ENTERS_AS_PARAGRAPH;

	for (int j = i+1; j < count+1; j++) {
	PREPARE_TOKEN(j, j);

	// looking for the newline, still part of code block
	if (!(j_type == t_newline \|\| j_type == 0)) continue;

	// look ahead to check if next token is also newline
	PREPARE_TOKEN(j+1, j_p1);
	if (j_p1_type == t_newline \|\| j_p1_type == 0) {
	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
	COMMIT_ELEMENT(commitRange, s_code_block, nil);
	CONTINUE_AT(j);
	} else if (j_p1_type != t_indent) {
	break; // a newline followed by something that isn't an indent means invalid code block
	}
	}
	} break;

	#pragma mark blockquote
	case t_quotemark: {
	// a quotation block must be:
	// - preceded by two newlines (or nothingness)
	// - followed by two newline (or nothingness)

	CHECK_ENTERS_AS_PARAGRAPH;

	NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];

	// we go to count+1 so we can catch a blockquote at the very end of the string
	for (int j = i+1; j < count+1; j++) {
	PREPARE_TOKEN(j, j);

	if (j_type == t_quotemark) [markupTokenRanges addObject:j_token[@"Range"]];

	// looking for the newline, still part of blockquote
	if (!(j_type == t_newline \|\| j_type == 0)) continue;

	// look ahead to check if next token is also newline
	PREPARE_TOKEN(j+1, j_p1);
	if (j_p1_type == t_newline \|\| j_p1_type == 0) {
	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
	COMMIT_ELEMENT(commitRange, s_blockquote, @{@"MakupTokenRanges": markupTokenRanges});
	CONTINUE_AT(j);
	} else if (j_p1_type != t_quotemark) {
	break; // a newline followed by something that isn't a quotemark means invalid blockquote
	}
	}
	} break;

	#pragma mark headlines
	case t_headline_mark: {
	// a regular headline must be:
	// - preceded by two newlines (or nothingness)
	// - followed by two newline (or nothingness)

	NSMutableArray *markupTokenRanges = [NSMutableArray arrayWithObject:i_token[@"Range"]];

	CHECK_ENTERS_AS_PARAGRAPH;

	// we go to count+1 so we can catch a headline at the very end of the string
	for (int j = i+1; j < count+1; j++) {
	PREPARE_TOKEN(j, j);

	// looking for the newline, still part of code block
	if (!(j_type == t_newline \|\| j_type == 0)) continue;

	// look ahead to check if next token is also newline
	PREPARE_TOKEN(j+1, j_p1);
	if ((j_p1_type == t_newline \|\| j_p1_type == 0)) {

	PREPARE_TOKEN(j-1, j_m1);
	if (j_m1_type == t_headline_mark)
	[markupTokenRanges addObject:j_m1_token[@"Range"]];

	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
	enum SyntaxElementType commitType = (enum SyntaxElementType)((int)s_headline_1 + (MAX(1, MIN(6, [i_token[@"Text"] length])) - 1));

	COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": markupTokenRanges});
	CONTINUE_AT(j);
	} else {
	break; // this means a multi-line headline, which is not allowed
	}
	}
	} break;

	#pragma mark github code block
	case t_triple_backtick: {
	// a github-style code block is a paragraph

	CHECK_ENTERS_AS_PARAGRAPH;

	for (int j = i+1; j < count; j++) {
	PREPARE_TOKEN(j, j);

	// iterate until we find the matching triple backtick
	if (j_type != t_triple_backtick) continue;

	// look ahead to check if next two tokens are newlines
	PREPARE_TOKEN(j+1, j_p1);
	PREPARE_TOKEN(j+2, j_p2);
	if ((j_p1_type == t_newline \|\| j_p1_type == 0) &&
	(j_p2_type == t_newline \|\| j_p2_type == 0)) {

	int j_p1 = j+1;
	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j_p1);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);

	NSDictionary *metaDictionary = @{@"MakupTokenRanges": @[i_token[@"Range"], j_token[@"Range"]]};

	COMMIT_ELEMENT(commitRange, s_code_block, metaDictionary);
	CONTINUE_AT(j+1);
	}
	}
	} break;

	#pragma mark underlined headline
	case t_headline_underline: case t_headline_double_underline: {

	// previous token MUST be newline
	PREPARE_TOKEN(i-1, i_m1);
	if (i_m1_type != t_newline) break;

	// if no preceding newline found, assume zero -- this deals with headline at start of file
	int startIndex = 0;

	// loop backwards to find the actual headline
	for (int j = i-2; j >= 0; j--) {
	PREPARE_TOKEN(j, j);

	// i'm looking for a preceding newline to find beginning of previous line
	if (j_type == t_newline) {
	startIndex = j+1;
	break;
	}
	}

	// verify that it starts as paragraph, or on bounds
	PREPARE_TOKEN(startIndex-1, s_m1);
	if (!(s_m1_type == t_newline \|\| s_m1_type == 0)) break;

	PREPARE_TOKEN(startIndex-2, s_m2);
	if (!(s_m2_type == t_newline \|\| s_m2_type == 0)) break;

	// verify that it's followed by paragraph end
	PREPARE_TOKEN(i+1, i_p1);
	if (!(i_p1_type == t_newline \|\| i_p1_type == 0)) break;

	PREPARE_TOKEN(i+2, i_p2);
	if (!(i_p2_type == t_newline \|\| i_p2_type == 0)) break;

	// if we got to here without breaking, we're a valid match
	PREPARE_TOKEN(startIndex, start);
	PREPARE_RANGE_FROM_TOKEN(i_p1)
	NSRange start_range = [start_token[@"Range"] rangeValue];
	NSRange commitRange = BRIDGE_RANGES(start_range, i_p1_range);

	enum SyntaxElementType commitType;
	if (i_type == t_headline_double_underline)
	commitType = s_headline_1;
	else
	commitType = s_headline_2;
	COMMIT_ELEMENT(commitRange, commitType, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
	CONTINUE_AT(i+1); // eat newline

	} break;

	#pragma mark rule
	case t_rule: {
	CHECK_ENTERS_AS_PARAGRAPH;
	PREPARE_TOKEN(i+1, i_p1);
	PREPARE_TOKEN(i+2, i_p2);
	if ((i_p1_type == t_newline \|\| i_p1_type == 0) &&
	(i_p2_type == t_newline \|\| i_p2_type == 0)) {
	COMMIT_ELEMENT(i_range, s_rule, nil);
	CONTINUE_AT(i+1);
	}
	} break;

	#pragma mark links & images
	case t_open_bracket: {

	// seek end of link text
	int linkTextEnd = -1;
	for (int j = i+1; j < count; j++) {
	PREPARE_TOKEN(j, j);
	if (j_type == t_close_bracket) {
	// must be followed by [, (, or :, else invalid
	PREPARE_TOKEN(j+1, j_p1);
	if (!(j_p1_type == t_open_bracket \|\|
	j_p1_type == t_open_paren \|\|
	j_p1_type == t_colon)) break;

	// found!
	linkTextEnd = j; break;
	} else if (j_type == t_newline) {
	// invalid, fallback to text
	break;
	}
	}
	if (linkTextEnd < 0) break; // end not found, this is not a link

	// now that we have end of link text, find rest of link
	PREPARE_TOKEN(linkTextEnd+1, lte_p1); // prepare "link target open" token, [ or (
	// start iterating after the lto

	if (lte_p1_type == t_colon) {
	// we've got a link definition
	// now we've gotta check how the grammar element started, definitions need to be one per line
	PREPARE_TOKEN(i-1, i_m1);
	// also, t_colon needs to be followed by a space
	PREPARE_TOKEN(linkTextEnd+2, lte_p2);

	if (!(i_m1_type == t_newline \|\| i_m1_type == 0) \|\|
	lte_p2_type != t_space) {
	// fail
	CONTINUE_AT(i+1);
	}

	for (int j = linkTextEnd+3; j < count; j++) {
	PREPARE_TOKEN(j, j);

	// looking for newline or end of file
	if (j_type == t_newline \|\| j_type == 0) {

	// we're good!
	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);

	// prepare name range
	// NOTE: this is meant for ARC, i guess… not super clean. fuck.
	NSMutableString *name = [[NSMutableString alloc] init];
	for (int k = i+1; k <= linkTextEnd-1; k++) {
	PREPARE_TOKEN(k, k);
	[name appendString:k_token[@"Text"]];
	}


	// prepare url range
	NSMutableString *url = [[NSMutableString alloc] init];
	for (int k = linkTextEnd+3; k <= j-1; k++) {
	PREPARE_TOKEN(k, k);
	[url appendString:k_token[@"Text"]];
	}

	NSDictionary *metaDictionary = @{
	@"Name": [NSString stringWithString:name],
	@"URL": [NSString stringWithString:url]
	};

	COMMIT_ELEMENT(commitRange, s_link_info, metaDictionary);
	CONTINUE_AT(j);
	}
	}

	} else if (lte_p1_type == t_open_bracket \|\| lte_p1_type == t_open_paren) {
	// we've got an inline link

	for (int j = linkTextEnd+2; j < count; j++) {
	PREPARE_TOKEN(j, j);
	if ((lte_p1_type == t_open_bracket && j_type == t_close_bracket) \|\|
	(lte_p1_type == t_open_paren && j_type == t_close_paren)) {
	// find closing ] or )

	// find what link points to
	NSString *nameOrURLKey = (lte_p1_type == t_open_bracket ? @"Name" : @"URL");

	// actually, it may be an image if it's preceded by a bang!
	PREPARE_TOKEN(i-1, i_m1);
	BOOL isImage = (i_m1_type == t_bang && lte_p1_type == t_open_paren);
	enum SyntaxElementType commitType = (isImage ? s_image : s_link);
	PREPARE_RANGE_FROM_TOKEN(i_m1);
	NSRange fromRange = (isImage ? i_m1_range : i_range);

	// we're good!
	PREPARE_RANGE_FROM_TOKEN(j);
	NSRange commitRange = BRIDGE_RANGES(fromRange, j_range);

	NSMutableString *linkValue = [[NSMutableString alloc] init];
	BOOL isEmptyLink = (linkTextEnd+2 == j && lte_p1_type == t_open_bracket && j_type == t_close_bracket);
	int linkValueFrom = isEmptyLink ? i+1 : linkTextEnd+2;
	int linkValueTo = isEmptyLink ? linkTextEnd-1 : j-1;
	for (int k = linkValueFrom; k <= linkValueTo; k++) {
	PREPARE_TOKEN(k, k);
	[linkValue appendString:k_token[@"Text"]];
	}

	COMMIT_ELEMENT(commitRange, commitType, @{nameOrURLKey: [NSString stringWithString:linkValue]});
	CONTINUE_AT(j);
	} else if (j_type == t_newline) {
	// invalid, fallback to text
	break;
	}
	}
	}

	} break;

	#pragma mark list items
	case t_bullet: {

	// enforce paragraph entrance, unless we're already inside a list
	if (i-1 != lastCommittedListItemToken) {
	CHECK_ENTERS_AS_PARAGRAPH;
	}

	for (int j = i+1; j < count+1; j++) {

	PREPARE_TOKEN(j, j);
	// looking for newline or EOF
	if (!(j_type == t_newline \|\| j_type == 0)) continue;

	PREPARE_TOKEN(j+1, j_p1);
	// next token can be a paragraph end or another bullet
	// Note: we don't know whether next bullet is valid at this point, but fuck it
	if (j_p1_type == t_newline \|\| j_p1_type == t_bullet \|\| j_p1_type == 0) {
	PREPARE_TO_RANGE_FROM_TOKEN_OR_PREVIOUS(j);
	NSRange commitRange = BRIDGE_RANGES(i_range, toRange);
	COMMIT_ELEMENT(commitRange, s_list_item, @{@"MakupTokenRanges": @[i_token[@"Range"]]});
	lastCommittedListItemToken = j;
	CONTINUE_AT(i+1); // we continue inside the list item, so we can parse inline styles, too
	} else if (j_p1_type == t_indent) {
	continue; // next line being an indent means it's part of the list item
	} else if (j_p1_type == t_space) {
	// if it's a space, we'll also allow a combination of X space, where X is the length of the list item bullet
	BOOL valid = YES;
	int k = j+2;
	for (; k <= j+i_range.length; k++) {
	PREPARE_TOKEN(k, k);
	if (k_type == t_space) continue;
	// else invalid
	valid = NO;
	break;
	}
	if (valid) {
	j = k;
	continue; // valid
	} else {
	break; // invalid
	}
	} else {
	break; // invalid, discard entire item and move on
	}
	}

	} break;

	#pragma mark - default
	case t_text: default: {
	// do nothing
	} break;
	}

	next_syntax_element: continue;
	}


	return syntaxElements;
	}
	//
	// MarkdownSyntaxHighlighter.m
	// Macchiato
	//
	// Created by Kenneth Ballenegger on 7/24/12.
	// Copyright (c) 2012 Azure Talon. All rights reserved.
	//

	#import "MarkdownSyntaxHighlighter.h"
	#import "MacchiatoThemeController.h"

	#import "markdown.lexer.h"
	#import "MarkdownGrammar.h"


	static NSString *const TokenTypeAttributeName = @"TokenTypeAttributeName";
	static NSString *const SyntaxElementTypeAttributeName = @"SyntaxElementTypeAttributeName";


	@interface MarkdownSyntaxHighlighter () <NSLayoutManagerDelegate> {
	@private
	NSTextStorage *textStorage;
	}

	@end


	@implementation MarkdownSyntaxHighlighter

	- (id)initWithTextStorage:(NSTextStorage *)storage {
	if (self = [super init]) {
	self->textStorage = storage;
	}
	return self;
	}

	- (id)init {
	NSLog(@"You should only be using initWithTextStorage:.");
	abort();
	}


	- (void)highlightRange:(NSRange)range {
	// return;

	if (range.location == 0 && range.length == 0)
	return; // don't highlight empty document it's buggzy

	NSMutableArray *tokens = [NSMutableArray array];


	NSString *substringToLex = [self->textStorage.string substringWithRange:range];

	// testing the lexer
	lex([substringToLex cStringUsingEncoding:NSUTF8StringEncoding], ^(enum TokenType ttype, const char *ttext, NSRange trange) {

	[tokens addObject:@{@"TokenType": [NSNumber numberWithInt:ttype], @"Range": [NSValue valueWithRange:trange], @"Text": [NSString stringWithUTF8String:ttext]}];

	});

	NSArray *syntaxElements = MarkdownSyntaxElementsFromTokens(tokens);

	// this should be done on main thread
	dispatch_sync(dispatch_get_main_queue(), ^{

	// reset attributes globally
	// [self->textStorage setAttributes:nil range:range];
	[self highlightSyntaxElement:@{@"Range": [NSValue valueWithRange:NSMakeRange(0, range.length)], @"SyntaxElementType": [NSNumber numberWithInt:s_default]} inRange:range];

	// set attributes for syntax element tokens
	for (NSDictionary *syntaxElement in syntaxElements) {

	[self highlightSyntaxElement:syntaxElement inRange:range];
	}

	// debug mode

	NSUserDefaults *defaults = [NSUserDefaults standardUserDefaults];
	if ([defaults integerForKey:@"MacchiatoDebugMode"] == 1) {
	for (NSDictionary *token in tokens) {
	float f = ((int)[token[@"TokenType"] intValue] - 10) / 20.0f;
	NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
	NSRange absoluteRange = [token[@"Range"] rangeValue];
	absoluteRange.location += range.location;
	[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
	}
	} else if ([defaults integerForKey:@"MacchiatoDebugMode"] == 2) {
	[self->textStorage addAttribute:NSBackgroundColorAttributeName value:[NSColor colorWithCalibratedHue:(s_default - 10) / 20.0f saturation:1 brightness:1 alpha:.7] range:range];
	for (NSDictionary *syntaxElement in syntaxElements) {
	float f = ((int)[syntaxElement[@"SyntaxElementType"] intValue] - 10) / 20.0f;
	NSColor *color = [NSColor colorWithCalibratedHue:f saturation:1 brightness:1 alpha:.7];
	NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
	absoluteRange.location += range.location;
	[self->textStorage addAttribute:NSBackgroundColorAttributeName value:color range:absoluteRange];
	}
	}
	});
	}

	- (void)highlightSyntaxElement:(NSDictionary *)syntaxElement inRange:(NSRange)range {

	enum SyntaxElementType stype = ((NSNumber *)syntaxElement[@"SyntaxElementType"]).intValue;

	NSMutableDictionary *attrs = [NSMutableDictionary dictionary];

	MacchiatoThemeController *themeController = [MacchiatoThemeController sharedController];

	// use defaults
	attrs[NSFontAttributeName] = [themeController currentMainFont];
	attrs[NSForegroundColorAttributeName] = [themeController currentTextForegroundColor];



	#define CASE_HEADLINE_TYPE(index) case s_headline_##index: { attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait] toSize:20.0f - index]; } break;

	switch (stype) {
	case s_emphasis: {
	attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait];
	} break;
	case s_strong: {
	attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontBoldTrait];
	} break;
	case s_strong_emphasis: {
	attrs[NSFontAttributeName] = [[NSFontManager sharedFontManager] convertFont:[[NSFontManager sharedFontManager] convertFont:[themeController currentMainFont] toHaveTrait:NSFontItalicTrait] toHaveTrait:NSFontBoldTrait];
	} break;
	case s_code:
	case s_code_block: {
	attrs[NSFontAttributeName] = [themeController currentMonospacedFont];
	} break;
	case s_blockquote: {
	attrs[NSFontAttributeName] = [themeController currentQuoteFont];
	} break;
	case s_link:
	case s_image:
	case s_link_info: {
	attrs[NSUnderlineStyleAttributeName] = [NSNumber numberWithInt:NSSingleUnderlineStyle];
	attrs[NSUnderlineColorAttributeName] = [themeController currentUnderlineColor];
	attrs[NSCursorAttributeName] = [NSCursor pointingHandCursor];
	if (stype == s_link) {
	if (syntaxElement[@"URL"]) {
	attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenURL);
	attrs[@"URL"] = syntaxElement[@"URL"];
	} else if (syntaxElement[@"Name"]) {
	attrs[@"MarkdownClickAction"] = @(MarkdownClickActionOpenNamedLink);
	attrs[@"Name"] = syntaxElement[@"Name"];
	}
	} else {
	attrs[@"MarkdownClickAction"] = @(MarkdownClickActionRenderImage);
	attrs[@"URL"] = syntaxElement[@"URL"];
	}
	} break;
	case s_rule: {
	NSMutableParagraphStyle *paragraphStyle = [[themeController currentParagraphStyle] mutableCopy];
	[paragraphStyle setAlignment:NSCenterTextAlignment];
	attrs[NSParagraphStyleAttributeName] = paragraphStyle;
	} break;
	CASE_HEADLINE_TYPE(1);
	CASE_HEADLINE_TYPE(2);
	CASE_HEADLINE_TYPE(3);
	CASE_HEADLINE_TYPE(4);
	CASE_HEADLINE_TYPE(5);
	CASE_HEADLINE_TYPE(6);
	case s_default: default: {
	// do nothing
	} break;
	}

	// callback on named link info
	if (stype == s_link_info && self.namedLinkCallback) {
	self.namedLinkCallback(syntaxElement[@"Name"], syntaxElement[@"URL"]);
	}

	NSRange absoluteRange = [syntaxElement[@"Range"] rangeValue];
	absoluteRange.location += range.location;

	attrs[@"SyntexElementRange"] = [NSValue valueWithRange:absoluteRange];

	[self->textStorage setAttributes:attrs range:absoluteRange];

	for (NSValue *markupStatementRangeValue in syntaxElement[@"MakupTokenRanges"]) {
	NSRange markupStatementRange = markupStatementRangeValue.rangeValue;
	markupStatementRange.location += range.location;

	[self->textStorage addAttribute:NSForegroundColorAttributeName value:[themeController currentTextSubduedColor] range:markupStatementRange];
	}
	}



	@end