Skip to content

Instantly share code, notes, and snippets.

@etscrivner
Created May 9, 2021 00:09
Show Gist options
  • Save etscrivner/9880e0a247ebdee42326b9d179268d6f to your computer and use it in GitHub Desktop.
Save etscrivner/9880e0a247ebdee42326b9d179268d6f to your computer and use it in GitHub Desktop.
Jasper is a simple markdown parser that uses successive application of rewriting rules to apply markdown to a file.
// jasper.c - Processor for custom flavor of markdown called jasperstone
//
// Description:
//
// Jasperstone processes a markup file by applying a set of rewriting rules to
// the document. Each rewriting rule processes the entire document replacing a
// particular piece of syntax. The end result is a file that is completely
// marked up without the need for a complex parser.
#include <inttypes.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "language_layer.h"
#include "memory_arena.h"
#define MemorySize Megabytes(64)
///////////////////////////////////////////////////////////////////////////////
String_UTF8 KebabCaseName(String_UTF8 Name, Memory_Arena *Arena)
{
String_UTF8 Result = {0};
Result.Str = ArenaAlloc(Arena, Name.Length);
Result.Length = 0;
b32 IsFirstSpace = true;
for (u32 I = 0; I < Name.Length; ++I) {
if (Name.Str[I] == ' ' || Name.Str[I] == '\t') {
if (IsFirstSpace) {
Result.Str[Result.Length++] = '-';
IsFirstSpace = false;
}
continue;
} else {
if (IsCapitalLetter(Name.Str[I])) {
Result.Str[Result.Length++] = ToLower(Name.Str[I]);
} else {
Result.Str[Result.Length++] = Name.Str[I];
}
IsFirstSpace = true;
}
}
return(Result);
}
///////////////////////////////////////////////////////////////////////////////
typedef struct Rule_Rewriter_Tag Rule_Rewriter;
typedef void Rewriter_Function(Rule_Rewriter*);
typedef enum {
REWRITE_RULE_pair,
REWRITE_RULE_list,
REWRITE_RULE_custom
} Rewrite_Rule_Type;
typedef struct {
char *Name;
Rewrite_Rule_Type Type;
union {
struct {
char *PairStr;
char *TagBegin;
char *TagEnd;
} Pair;
struct {
char *ItemPrefix;
char *TagBegin;
char *TagEnd;
} List;
Rewriter_Function *Rewrite;
};
} Rewrite_Rule;
typedef struct Rule_Rewriter_Tag {
String_UTF8 Source;
String_UTF8 Working;
u32 SourceIndex;
u32 WorkingIndex;
Memory_Arena *Arena;
} Rule_Rewriter;
Rule_Rewriter RuleRewriterInit(char *FilePath, Memory_Arena *Arena)
{
Rule_Rewriter Result = {0};
ArenaReadFile(FilePath, Arena, &Result.Source);
Result.Working.Str = Arena->Base + Arena->Used;
Result.Working.Length = Arena->Size - Arena->Used;
Result.Arena = Arena;
return(Result);
}
void RewriterCompleteRule(Rule_Rewriter *Rewriter)
{
memcpy(Rewriter->Arena->Base, Rewriter->Working.Str, Rewriter->WorkingIndex);
Rewriter->Arena->Used = Rewriter->WorkingIndex;
Rewriter->Source.Str = Rewriter->Arena->Base;
Rewriter->Source.Length = Rewriter->WorkingIndex;
Rewriter->SourceIndex = 0;
Rewriter->Working.Str = Rewriter->Arena->Base + Rewriter->Arena->Used;
Rewriter->Working.Length = Rewriter->Arena->Size - Rewriter->Arena->Used;
Rewriter->WorkingIndex = 0;
}
b32 RewriterHasSourceText(Rule_Rewriter *Rewriter) {
return(Rewriter->SourceIndex < Rewriter->Source.Length);
}
b32 RewriterHasWorkingSpace(Rule_Rewriter *Rewriter) {
return(Rewriter->WorkingIndex < Rewriter->Working.Length);
}
b32 RewriterIsValid(Rule_Rewriter *Rewriter) {
return(RewriterHasSourceText(Rewriter) && RewriterHasWorkingSpace(Rewriter));
}
void RewriterSourceAdvance(Rule_Rewriter *Rewriter, u32 Length) {
Assert(Rewriter->SourceIndex + Length <= Rewriter->Source.Length);
Rewriter->SourceIndex += Length;
}
void RewriterCopyNext(Rule_Rewriter *Rewriter) {
Assert(Rewriter->SourceIndex < Rewriter->Source.Length);
Assert(Rewriter->WorkingIndex < Rewriter->Working.Length);
u32 Length = UTF8CodepointLengthBytes(Rewriter->Source.Str[Rewriter->SourceIndex]);
Assert(Rewriter->SourceIndex + Length <= Rewriter->Source.Length);
Assert(Rewriter->WorkingIndex + Length <= Rewriter->Working.Length);
memcpy(
Rewriter->Working.Str + Rewriter->WorkingIndex,
Rewriter->Source.Str + Rewriter->SourceIndex,
Length
);
Rewriter->WorkingIndex += Length;
Rewriter->SourceIndex += Length;
}
void RewriterAdvanceAndCopy(Rule_Rewriter *Rewriter, u32 Length) {
for (u32 I = 0; I < Length; ++I) {
RewriterCopyNext(Rewriter);
}
}
b32 RewriterSourceMatches(Rule_Rewriter *Rewriter, char *Match) {
b32 Result = false;
u32 MatchLength = strlen(Match);
if (Rewriter->SourceIndex + MatchLength <= Rewriter->Source.Length) {
if (memcmp(Rewriter->Source.Str + Rewriter->SourceIndex, Match, MatchLength) == 0) {
Result = true;
}
}
return(Result);
}
b32 RewriterSourceMatchesCh(Rule_Rewriter *Rewriter, char Ch) {
b32 Result = false;
if (Rewriter->Source.Str[Rewriter->SourceIndex] == Ch) {
Result = true;
}
return(Result);
}
void RewriterPutc(Rule_Rewriter *Rewriter, char Ch) {
if (Rewriter->WorkingIndex + 1 < Rewriter->Working.Length) {
Rewriter->Working.Str[Rewriter->WorkingIndex++] = Ch;
}
}
void RewriterPut(Rule_Rewriter *Rewriter, char *Text) {
u32 TextLength = strlen(Text);
if (Rewriter->WorkingIndex + TextLength < Rewriter->Working.Length) {
memcpy(Rewriter->Working.Str + Rewriter->WorkingIndex, Text, TextLength);
Rewriter->WorkingIndex += TextLength;
}
}
void RewriterPutf(Rule_Rewriter *Rewriter, char *Format, ...) {
va_list List;
va_start(List, Format);
Rewriter->WorkingIndex += vsnprintf(
(char*)(Rewriter->Working.Str + Rewriter->WorkingIndex),
Rewriter->Working.Length - Rewriter->WorkingIndex,
Format,
List
);
va_end(List);
}
void RewriterSkipWhitespace(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter) &&
(RewriterSourceMatchesCh(Rewriter, ' ') ||
RewriterSourceMatchesCh(Rewriter, '\t'))) {
RewriterSourceAdvance(Rewriter, 1);
}
}
b32 RewriterSkipCode(Rule_Rewriter *Rewriter) {
b32 SkippedCode = false;
if (RewriterIsValid(Rewriter) && RewriterSourceMatches(Rewriter, "```")) {
SkippedCode = true;
RewriterAdvanceAndCopy(Rewriter, 3);
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "```")) {
RewriterAdvanceAndCopy(Rewriter, 3);
break;
} else {
RewriterCopyNext(Rewriter);
}
}
}
return(SkippedCode);
}
void RewritePair(Rule_Rewriter *Rewriter, Rewrite_Rule *Rule)
{
b32 InPairTag = false;
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, Rule->Pair.PairStr)) {
RewriterSourceAdvance(Rewriter, strlen(Rule->Pair.PairStr));
if (InPairTag) {
InPairTag = false;
RewriterPut(Rewriter, Rule->Pair.TagEnd);
} else {
InPairTag = true;
RewriterPut(Rewriter, Rule->Pair.TagBegin);
}
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteParagraph(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatchesCh(Rewriter, '#') ||
RewriterSourceMatches(Rewriter, "![") ||
RewriterSourceMatchesCh(Rewriter, '-')) {
// Ignore titles and images on their own separate lines (don't wrap them
// in paragraphs)
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '\n')) {
RewriterCopyNext(Rewriter);
}
} else if (RewriterSourceMatches(Rewriter, "<p>")) {
// Prevent wrapping a paragraph in another paragraph
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "</p>")) {
RewriterSourceAdvance(Rewriter, 4);
RewriterPut(Rewriter, "</p>");
break;
}
RewriterCopyNext(Rewriter);
}
} else if (!RewriterSourceMatchesCh(Rewriter, ' ') &&
!RewriterSourceMatchesCh(Rewriter, '\t') &&
!RewriterSourceMatchesCh(Rewriter, '\n')) {
if (!RewriterSkipCode(Rewriter)) {
RewriterPut(Rewriter, "<p>\n");
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "\n\n")) {
RewriterSourceAdvance(Rewriter, 2);
break;
}
RewriterCopyNext(Rewriter);
}
RewriterPut(Rewriter, "\n</p>\n\n");
}
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteTitle(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatchesCh(Rewriter, '#')) {
u32 TitleDepth = 0;
while (RewriterIsValid(Rewriter) && RewriterSourceMatchesCh(Rewriter, '#')) {
++TitleDepth;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSkipWhitespace(Rewriter);
RewriterPutf(Rewriter, "<h%d>", TitleDepth);
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '\n')) {
RewriterCopyNext(Rewriter);
}
RewriterPutf(Rewriter, "</h%d>", TitleDepth);
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteExternalLink(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "![")) {
// Skip image stuff
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) {
RewriterCopyNext(Rewriter);
}
} else if (RewriterSourceMatchesCh(Rewriter, '[')) {
RewriterSourceAdvance(Rewriter, 1);
String_UTF8 LinkText = {0};
LinkText.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
LinkText.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ']')) {
++LinkText.Length;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSourceAdvance(Rewriter, 1);
Assert(RewriterSourceMatchesCh(Rewriter, '('));
RewriterSourceAdvance(Rewriter, 1);
String_UTF8 LinkURL = {0};
LinkURL.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
LinkURL.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) {
++LinkURL.Length;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSourceAdvance(Rewriter, 1);
RewriterPutf(
Rewriter, "<a href=\"%.*s\" target=\"_blank\">%.*s</a>",
(int)LinkURL.Length,
LinkURL.Str,
(int)LinkText.Length,
LinkText.Str
);
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriterKebabCaseName(Rule_Rewriter *Rewriter, String_UTF8 Name)
{
b32 IsFirstSpace = false;
for (u32 I = 0; I < Name.Length; ++I)
{
if (Name.Str[I] != ' ' && Name.Str[I] != '\t') {
RewriterPutc(Rewriter, Name.Str[I]);
if (!IsFirstSpace) {
IsFirstSpace = true;
}
} else if (IsFirstSpace) {
RewriterPutc(Rewriter, '-');
IsFirstSpace = false;
}
}
}
void RewriteInternalLink(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "{{")) {
RewriterSourceAdvance(Rewriter, 2);
RewriterSkipWhitespace(Rewriter);
String_UTF8 PageName = {0};
PageName.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
PageName.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatches(Rewriter, "}}")) {
++PageName.Length;
RewriterSourceAdvance(Rewriter, 1);
}
Assert(RewriterSourceMatches(Rewriter, "}}"));
RewriterSourceAdvance(Rewriter, 2);
if (RewriterSourceMatchesCh(Rewriter, '{')) {
// Allow for optional link text to follow
RewriterSourceAdvance(Rewriter, 1);
String_UTF8 LinkText = {0};
LinkText.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
LinkText.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, '}')) {
++LinkText.Length;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSourceAdvance(Rewriter, 1);
RewriterPut(Rewriter, "<a href=\"");
RewriterKebabCaseName(Rewriter, PageName);
RewriterPutf(Rewriter, ".html\">%.*s</a>", (int)LinkText.Length, LinkText.Str);
} else {
// Otherwise, use kebab case of the page name as link name
RewriterPut(Rewriter, "<a href=\"");
RewriterKebabCaseName(Rewriter, PageName);
RewriterPutf(Rewriter, ".html\">%.*s</a>", (int)PageName.Length, PageName.Str);
}
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteImage(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "![")) {
RewriterSourceAdvance(Rewriter, 2);
String_UTF8 AltText = {0};
AltText.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
AltText.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ']')) {
++AltText.Length;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSourceAdvance(Rewriter, 1);
Assert(RewriterSourceMatchesCh(Rewriter, '('));
RewriterSourceAdvance(Rewriter, 1);
String_UTF8 ImageSource = {0};
ImageSource.Str = Rewriter->Source.Str + Rewriter->SourceIndex;
ImageSource.Length = 0;
while (RewriterIsValid(Rewriter) && !RewriterSourceMatchesCh(Rewriter, ')')) {
++ImageSource.Length;
RewriterSourceAdvance(Rewriter, 1);
}
RewriterSourceAdvance(Rewriter, 1);
RewriterPutf(
Rewriter,
"<img src=\"%.*s\" alt=\"%.*s\" />",
(int)ImageSource.Length,
ImageSource.Str,
(int)AltText.Length,
AltText.Str
);
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteList(Rule_Rewriter *Rewriter, Rewrite_Rule *Rule) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, Rule->List.ItemPrefix)) {
RewriterPutf(Rewriter, "%s\n", Rule->List.TagBegin);
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, Rule->List.ItemPrefix)) {
RewriterSourceAdvance(Rewriter, 1);
RewriterPut(Rewriter, "<li>");
RewriterSkipWhitespace(Rewriter);
} else if (RewriterSourceMatchesCh(Rewriter, '\n')) {
RewriterPut(Rewriter, "</li>\n");
if (RewriterSourceMatches(Rewriter, "\n\n")) {
RewriterSourceAdvance(Rewriter, 2);
break;
} else {
RewriterSourceAdvance(Rewriter, 1);
}
} else {
RewriterCopyNext(Rewriter);
}
}
RewriterPutf(Rewriter, "%s\n\n", Rule->List.TagEnd);
} else {
if (!RewriterSkipCode(Rewriter)) {
RewriterCopyNext(Rewriter);
}
}
}
}
void RewriteCode(Rule_Rewriter *Rewriter) {
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "```")) {
RewriterPut(Rewriter, "<code>");
RewriterSourceAdvance(Rewriter, 3);
while (RewriterIsValid(Rewriter)) {
if (RewriterSourceMatches(Rewriter, "```")) {
RewriterSourceAdvance(Rewriter, 3);
RewriterPut(Rewriter, "</code>");
break;
} else {
RewriterCopyNext(Rewriter);
}
}
} else {
RewriterCopyNext(Rewriter);
}
}
}
static Rewrite_Rule Rules[] = {
{ "bold", REWRITE_RULE_pair, .Pair = { "**", "<strong>", "</strong>" } },
{ "italic", REWRITE_RULE_pair, .Pair = { "*", "<em>", "</em>" } },
{ "strike", REWRITE_RULE_pair, .Pair = { "~", "<del>", "</del>" } },
{ "underline", REWRITE_RULE_pair, .Pair = { "_", "<u>", "</u>" } },
{ "paragraph", REWRITE_RULE_custom, .Rewrite = RewriteParagraph },
{ "title", REWRITE_RULE_custom, .Rewrite = RewriteTitle },
{ "image", REWRITE_RULE_custom, .Rewrite = RewriteImage },
{ "unordered list", REWRITE_RULE_list, .List = { "-", "<ul>", "</ul>" } },
{ "ordered list", REWRITE_RULE_list, .List = { "+", "<ol>", "</ol>" } },
{ "external links", REWRITE_RULE_custom, .Rewrite = RewriteExternalLink },
{ "internal links", REWRITE_RULE_custom, .Rewrite = RewriteInternalLink },
{ "code", REWRITE_RULE_custom, .Rewrite = RewriteCode }, // code should be applied last
};
String_UTF8 JasperstoneProcessFile(char *FilePath, Memory_Arena *Arena)
{
Rule_Rewriter Rewriter = RuleRewriterInit(FilePath, Arena);
for (u32 RuleIndex = 0; RuleIndex < ArrayCount(Rules); ++RuleIndex) {
switch (Rules[RuleIndex].Type) {
case REWRITE_RULE_pair:
RewritePair(&Rewriter, Rules + RuleIndex);
break;
case REWRITE_RULE_list:
RewriteList(&Rewriter, Rules + RuleIndex);
break;
case REWRITE_RULE_custom:
Rules[RuleIndex].Rewrite(&Rewriter);
break;
default:
break;
}
RewriterCompleteRule(&Rewriter);
}
return(Rewriter.Source);
}
#ifdef JASPERSTONE_MAIN
void RewriterTestSuite(Rule_Rewriter *Rewriter)
{
// Repeated application of rewriter rules should be safe (should not produce
// a change).
for (u32 RuleIndex = 0; RuleIndex < ArrayCount(Rules); ++RuleIndex) {
for (u32 I = 0; I < 100; ++I) {
if (Rules[RuleIndex].Type == REWRITE_RULE_pair) {
RewritePair(Rewriter, Rules + RuleIndex);
RewriterCompleteRule(Rewriter);
} else if (Rules[RuleIndex].Type == REWRITE_RULE_list) {
RewriteList(Rewriter, Rules + RuleIndex);
RewriterCompleteRule(Rewriter);
} else if (Rules[RuleIndex].Type == REWRITE_RULE_custom) {
Rules[RuleIndex].Rewrite(Rewriter);
RewriterCompleteRule(Rewriter);
}
}
}
printf("%.*s", (int)Rewriter->Source.Length, Rewriter->Source.Str);
}
int main(int argc, char** argv) {
int ReturnValue = 0;
if (argc >= 2) {
char *FilePath = argv[1];
u8 *Memory = (u8*)calloc(1, MemorySize);
Memory_Arena Arena = ArenaInit(Memory, MemorySize);
#if 0
Rule_Rewriter Rewriter = RuleRewriterInit(FilePath, &Arena);
RewriterTestSuite(&Rewriter);
#else
String_UTF8 Result = JasperstoneProcessFile(FilePath, &Arena);
printf("%.*s", (int)Result.Length, Result.Str);
#endif
free(Memory);
} else {
fprintf(stderr, "USAGE: jaspdown FILE\n");
ReturnValue = 1;
}
return(ReturnValue);
}
#endif // JASPERSTONE_MAIN
#ifndef LANGUAGE_LAYER_H
#define LANGUAGE_LAYER_H
#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
typedef int8_t i8;
typedef uint8_t u8;
typedef int16_t i16;
typedef uint16_t u16;
typedef int32_t i32;
typedef uint32_t u32;
typedef int64_t i64;
typedef uint64_t u64;
typedef uintptr_t umm;
typedef float f32;
typedef double f64;
typedef i8 b8;
typedef i16 b16;
typedef i32 b32;
#define Assert(X) assert(X)
#define Max(X, Y) (((X) > (Y)) ? (X) : (Y))
#define Min(X, Y) (((X) < (Y)) ? (X) : (Y))
#define ArrayCount(Array) ((sizeof(Array))/(sizeof((Array)[0])))
#define Kilobytes(Value) ((Value)*1024LL)
#define Megabytes(Value) (Kilobytes(Value)*1024LL)
#define Gigabytes(Value) (Megabytes(Value)*1024LL)
void ZeroMemory(u8 *Memory, umm SizeBytes)
{
memset(Memory, 0, SizeBytes);
}
#define ClearMemory(Value) ZeroMemory((u8*)&Value, sizeof(Value));
///////////////////////////////////////////////////////////////////////////////
b32 IsCapitalLetter(char Ch) {
return(Ch >= 'A' && Ch <= 'Z');
}
char ToLower(char Ch) {
if (IsCapitalLetter(Ch)) {
return(Ch + 0x20);
}
return(Ch);
}
///////////////////////////////////////////////////////////////////////////////
typedef struct {
union {
void *Data;
u8 *Str;
};
umm Length;
} String_UTF8;
String_UTF8 CreateStringUTF8(char *Value)
{
String_UTF8 Result = {0};
Result.Str = (u8*)Value;
Result.Length = strlen(Value);
return(Result);
}
i32 UTF8CompareCStr(String_UTF8* Left, char* Str)
{
i32 Result = 1;
u32 StrLength = strlen(Str);
if (Left->Length == StrLength)
{
Result = memcmp(Left->Str, Str, StrLength);
}
return(Result);
}
i32 UTF8Compare(String_UTF8* Left, String_UTF8* Right)
{
i32 Result = 1;
if (Left->Length == Right->Length)
{
Result = memcmp(Left->Str, Right->Str, Left->Length);
}
return(Result);
}
b32 UTF8EndsWith(String_UTF8* Str, char *Postfix)
{
b32 Result = false;
umm PostfixLength = strlen(Postfix);
if (Str->Length >= PostfixLength)
{
if (memcmp(Str->Str + (Str->Length - PostfixLength), Postfix, PostfixLength) == 0)
{
Result = true;
}
}
return(Result);
}
u32 UTF8CodepointLengthBytes(char Ch)
{
if (((Ch >> 7) & 0x1) == 0) {
return(1);
} else if (((Ch >> 5) & 0x7) == 0x6) {
return(2);
} else if (((Ch >> 4) & 0xF) == 0xE) {
return(3);
}
return(4);
}
char* UTF8PreviousChar(char *Ptr, char *Start)
{
// Step backwards until we find the first character that has bits 7 and 6
// set. Any non-start multibyte character components will have bit 7 set and
// bit 6 clear.
do {
if (Ptr <= Start) {
return NULL;
}
Ptr--;
} while ((*Ptr & 0xC0) == 0x80);
return(Ptr);
}
///////////////////////////////////////////////////////////////////////////////
typedef struct {
String_UTF8 *Data;
u8 *At;
u8 *End;
u8 *Peek;
} Text_Stream;
Text_Stream StreamInit(String_UTF8 *Data)
{
Text_Stream Result = {0};
Result.Data = Data;
Result.At = Data->Str;
Result.End = Data->Str + Data->Length;
Result.Peek = Data->Str + UTF8CodepointLengthBytes(*Result.At);
return(Result);
}
b32 IsValid(Text_Stream *Stream)
{
return(Stream->At >= Stream->Data->Str && Stream->At < Stream->End);
}
void StreamAdvance(Text_Stream *Stream, u32 NumBytes)
{
Stream->At = Stream->At + NumBytes;
if (IsValid(Stream)) {
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At);
}
}
void StreamReverse(Text_Stream *Stream, u32 NumBytes)
{
Stream->At = Stream->At - NumBytes;
if (IsValid(Stream)) {
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At);
}
}
u8* StreamNext(Text_Stream *Stream)
{
if (IsValid(Stream)) {
Stream->At = Stream->Peek;
Stream->Peek = Stream->At + UTF8CodepointLengthBytes(*Stream->At);
}
return(Stream->At);
}
u8* StreamPrev(Text_Stream *Stream)
{
if (IsValid(Stream)) {
Stream->Peek = Stream->At;
Stream->At = (u8*)UTF8PreviousChar((char*)Stream->Peek, (char*)Stream->Data->Str);
}
return(Stream->Peek);
}
b32 StreamCurrentMatchesCh(Text_Stream *Stream, char Ch)
{
return(*Stream->At == Ch);
}
b32 StreamCurrentMatches(Text_Stream *Stream, char *Str)
{
return(memcmp(Stream->At, Str, strlen(Str)) == 0);
}
b32 StreamIsAlpha(Text_Stream *Stream)
{
return((*Stream->At >= 'a' && *Stream->At <= 'z') || (*Stream->At >= 'A' && *Stream->At <= 'Z'));
}
b32 StreamIsDigit(Text_Stream *Stream)
{
return (*Stream->At >= '0' && *Stream->At <= '9');
}
b32 StreamIsNumeric(Text_Stream *Stream)
{
return (
StreamIsDigit(Stream) ||
((*Stream->At == '-' || *Stream->At == '+') && (*Stream->Peek >= '0' && *Stream->Peek <= '9')) ||
((*Stream->At == '-' || *Stream->At == '+') && *Stream->Peek == '.')
);
}
b32 StreamIsEquals(Text_Stream *Stream)
{
return (*Stream->At == ':' && *Stream->Peek == '=');
}
b32 StreamIsWhitespace(Text_Stream *Stream)
{
return(*Stream->At == ' ' || *Stream->At == '\t');
}
b32 StreamIsNewline(Text_Stream *Stream)
{
return(*Stream->At == '\n' || (*Stream->At == '\r' && *Stream->Peek == '\n'));
}
#endif // LANGUAGE_LAYER_H
#ifndef MEMORY_ARENA_H
#define MEMORY_ARENA_H
#include "language_layer.h"
#include <stdarg.h>
typedef struct Memory_Arena {
u8 *Base;
umm Size;
umm Used;
u32 ID;
u32 NumChildren;
u32 TempCount;
struct Memory_Arena *Parent;
} Memory_Arena;
// Useful macros
#define ArenaPushStruct(Arena_, Type_) (Type_*)ArenaAlloc(Arena_, sizeof(Type_))
#define ArenaPushArray(Arena_, Count_, Type_) (Type_*)ArenaAlloc(Arena_, sizeof(Type_) * Count_)
Memory_Arena ArenaInit(u8 *Base, umm SizeBytes)
{
Memory_Arena Result = {};
Result.Base = Base;
Result.Size = SizeBytes;
Result.Used = 0;
Result.ID = 0;
Result.NumChildren = 0;
Result.TempCount = 0;
Result.Parent = NULL;
return(Result);
}
u8* ArenaAlloc(Memory_Arena* Arena, umm SizeBytes)
{
umm TotalSize = Arena->Used + SizeBytes;
Assert(TotalSize <= Arena->Size);
u8* Result = Arena->Base + Arena->Used;
Arena->Used += SizeBytes;
return(Result);
}
void ArenaFree(Memory_Arena *Arena, umm SizeBytes)
{
Assert(Arena->Used >= SizeBytes);
Arena->Used -= SizeBytes;
}
char* ArenaSnprintf(Memory_Arena *Arena, char *Format, ...)
{
va_list List;
va_start(List, Format);
umm Available = Arena->Size - Arena->Used;
char *String = (char*)(Arena->Base + Arena->Used);
i32 Result = vsnprintf(String, Available, Format, List);
Assert(Result >= 0); // Crash on error result
if (Result <= Available) {
Arena->Used += Result;
} else if (Result > Available) {
Arena->Used += Available;
}
va_end(List);
return(String);
}
b32 ArenaReadFile(const char *FileName, Memory_Arena *Arena, String_UTF8 *Str)
{
b32 Result = false;
FILE *File = fopen(FileName, "rb");
if (File != NULL) {
fseek(File, 0, SEEK_END);
Str->Length = ftell(File);
fseek(File, 0, SEEK_SET);
Str->Str = ArenaAlloc(Arena, Str->Length);
fread(Str->Data, Str->Length, 1, File);
fclose(File);
Result = true;
}
return(Result);
}
b32 ArenaWriteFile(Memory_Arena *Arena, char *FileName)
{
b32 Result = true;
FILE *DotFile = fopen(FileName, "wb");
if (DotFile) {
fwrite(Arena->Base, Arena->Used, 1, DotFile);
fclose(DotFile);
} else {
Result = false;
}
return(Result);
}
///////////////////////////////////////////////////////////////////////////////
// child arenas
// ArenaPushChild pushes a child memory arena of Size bytes onto the Parent arena.
Memory_Arena ArenaPushChild(Memory_Arena *Parent, umm Size)
{
Memory_Arena Result = {};
Result.Base = ArenaAlloc(Parent, Size);
Result.Size = Size;
Result.Used = 0;
Result.Parent = Parent;
Result.ID = Parent->NumChildren;
Result.NumChildren = 0;
Parent->NumChildren++;
return(Result);
}
// ArenaPop removes the child arena from its parent, restoring the space it occupied.
void ArenaPopChild(Memory_Arena *Child)
{
Memory_Arena *Parent = Child->Parent;
assert(Parent);
assert((Parent->NumChildren - 1) == Child->ID);
Parent->Used -= Child->Size;
// Zero out all used memory for later allocations
ZeroMemory(Parent->Base + Parent->Used, Child->Used);
Child->Parent = NULL;
Child->ID = 0;
Child->Used = 0;
--Parent->NumChildren;
}
#endif // MEMORY_ARENA_H
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment