Skip to content

Instantly share code, notes, and snippets.

@sleexyz
Created May 25, 2023 22:59
Show Gist options
  • Save sleexyz/755a26d5d6d7342cdd0a5e55cc22633f to your computer and use it in GitHub Desktop.
Save sleexyz/755a26d5d6d7342cdd0a5e55cc22633f to your computer and use it in GitHub Desktop.
#include "tree_sitter/parser.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
enum TokenType {
INDENT,
DEDENT,
NEWLINE,
};
typedef struct {
uint16_t indent_length_stack[1024];
size_t indent_length_stack_size;
} Scanner;
void *tree_sitter_puddlejumper_external_scanner_create() {
Scanner *scanner = calloc(1, sizeof(Scanner));
return scanner;
}
void tree_sitter_puddlejumper_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
free(scanner);
}
unsigned tree_sitter_puddlejumper_external_scanner_serialize(void *payload,
char *buffer) {
Scanner *scanner = (Scanner *)payload;
size_t i = 0;
// The first element of the stack is always 0.
for (size_t j = 1; j < scanner->indent_length_stack_size &&
i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
++j) {
buffer[i++] = scanner->indent_length_stack[j];
}
return i;
}
void tree_sitter_puddlejumper_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
Scanner *scanner = (Scanner *)payload;
scanner->indent_length_stack_size = 1;
if (length > 0) {
for (size_t i = 0; i < length; i++) {
scanner->indent_length_stack_size++;
scanner->indent_length_stack[scanner->indent_length_stack_size - 1] =
buffer[i];
}
}
}
bool tree_sitter_puddlejumper_external_scanner_scan(void *payload,
TSLexer *lexer,
const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
lexer->mark_end(lexer);
bool found_end_of_line = false;
uint32_t indent_length = 0;
for (;;) {
if (lexer->lookahead == '\n') {
found_end_of_line = true;
indent_length = 0;
lexer->advance(lexer, false);
} else if (lexer->lookahead == ' ') {
indent_length++;
lexer->advance(lexer, false);
} else if (lexer->lookahead == '\t') {
indent_length += 8;
lexer->advance(lexer, false);
} else if (lexer->lookahead == '\r') {
indent_length = 0;
lexer->advance(lexer, false);
} else if (lexer->lookahead == '\f') {
indent_length = 0;
lexer->advance(lexer, false);
} else if (lexer->lookahead == 0) {
indent_length = 0;
found_end_of_line = true;
break;
} else {
break;
}
}
if (found_end_of_line) {
uint16_t current_indent_length =
scanner->indent_length_stack_size == 0
? 0
: scanner
->indent_length_stack[scanner->indent_length_stack_size - 1];
if (!(scanner->indent_length_stack_size == 0)) {
if (valid_symbols[INDENT] && indent_length > current_indent_length) {
scanner->indent_length_stack_size++;
scanner->indent_length_stack[scanner->indent_length_stack_size - 1] =
indent_length;
lexer->result_symbol = INDENT;
return true;
}
if ((valid_symbols[DEDENT] || !valid_symbols[NEWLINE]) &&
indent_length < current_indent_length) {
scanner->indent_length_stack_size--;
lexer->result_symbol = DEDENT;
return true;
}
}
if (valid_symbols[NEWLINE] &&
(current_indent_length == indent_length ||
(!valid_symbols[INDENT] && !valid_symbols[DEDENT]))) {
lexer->result_symbol = NEWLINE;
return true;
}
}
return false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment