Skip to content

Instantly share code, notes, and snippets.

@kkestell
Last active January 31, 2024 12:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kkestell/2fb1f5d5bbc4bf00ed594d8f78ed6ccb to your computer and use it in GitHub Desktop.
Save kkestell/2fb1f5d5bbc4bf00ed594d8f78ed6ccb to your computer and use it in GitHub Desktop.
#include "utf8.h"
#include <stdio.h>
#include <stdlib.h>
utf8_int8_t *ib, *it, *ob;
size_t len = 0, cap = 0;
void parse_text();
void parse_heading();
void parse_unordered_list();
void parse_ordered_list();
void parse_code_block();
void parse_blockquote();
void parse_horizontal_rule();
void parse_paragraph();
void load(const char *filename);
void save(const char *filename);
utf8_int32_t peek();
void pop(int n);
void skip_whitespace();
void put_codepoint(utf8_int32_t codepoint);
void put_string(const char *str);
int main(int argc, char *argv[])
{
if (argc < 3)
{
printf("Usage: %s <input_file> <output_file>\n", argv[0]);
return 1;
}
load(argv[1]);
while (1)
{
utf8_int32_t codepoint = peek();
if (codepoint == 0)
{
break;
}
switch (codepoint)
{
case '#':
parse_heading();
break;
case '*':
parse_unordered_list();
break;
case '1':
parse_ordered_list();
break;
case '`':
parse_code_block();
break;
case '>':
parse_blockquote();
break;
case '-':
parse_horizontal_rule();
break;
default:
parse_paragraph();
break;
}
while (peek() == '\n')
{
pop(1);
}
}
save(argv[2]);
free(ib);
free(ob);
return 0;
}
void parse_text()
{
while (peek() != '\n' && peek() != 0)
{
if (peek() == '*')
{
pop(1); // *
if (peek() == '*')
{
pop(1); // *
put_string("<strong>");
while (!(it[0] == '*' && it[1] == '*') && peek() != '\n' && peek() != 0)
{
put_codepoint(peek());
pop(1);
}
pop(2); // **
put_string("</strong>");
}
else
{
put_string("<em>");
while (peek() != '*' && peek() != '\n' && peek() != 0)
{
put_codepoint(peek());
pop(1);
}
pop(1); // *
put_string("</em>");
}
}
else if (peek() == '[')
{
pop(1); // [
utf8_int32_t text[256];
size_t text_len = 0;
while (peek() != ']')
{
utf8_int32_t codepoint = peek();
text[text_len++] = codepoint;
pop(1);
}
pop(2); // ](
utf8_int32_t link[256];
size_t link_len = 0;
while (peek() != ')')
{
utf8_int32_t codepoint = peek();
link[link_len++] = codepoint;
pop(1);
}
pop(1); // )
put_string("<a href=\"");
for (size_t i = 0; i < link_len; i++)
{
put_codepoint(link[i]);
}
put_string("\">");
for (size_t i = 0; i < text_len; i++)
{
put_codepoint(text[i]);
}
put_string("</a>");
}
else
{
put_codepoint(peek());
pop(1);
}
}
}
void parse_heading()
{
int level = 0;
while (peek() == '#')
{
level++;
pop(1); // #
}
put_string("<h");
put_codepoint('0' + level);
put_string(">");
skip_whitespace();
parse_text();
put_string("</h");
put_codepoint('0' + level);
put_string(">\n");
}
void parse_unordered_list()
{
put_string("<ul>\n");
while (peek() == '*')
{
pop(1); // *
skip_whitespace();
put_string("<li>");
parse_text();
put_string("</li>\n");
pop(1); // \n
}
put_string("</ul>\n");
}
void parse_ordered_list()
{
put_string("<ol>\n");
int index = 1;
while (peek() == '0' + index)
{
pop(2); // 1.
skip_whitespace();
put_string("<li>");
parse_text();
put_string("</li>\n");
pop(1); // \n
index++;
}
put_string("</ol>\n");
}
void parse_code_block()
{
pop(3); // ```
put_string("<pre><code>");
while (peek() != '`' && peek() != 0)
{
put_codepoint(peek());
pop(1);
}
put_string("</code></pre>\n");
pop(3); // ```
}
void parse_blockquote()
{
put_string("<blockquote>");
pop(1); // >
skip_whitespace();
parse_text();
put_string("</blockquote>\n");
}
void parse_horizontal_rule()
{
pop(3); // ---
put_string("<hr>\n");
}
void parse_paragraph()
{
put_string("<p>");
parse_text();
put_string("</p>\n");
}
void load(const char *filename)
{
FILE *file = fopen(filename, "r");
if (!file)
{
perror("Error opening file");
exit(EXIT_FAILURE);
}
fseek(file, 0, SEEK_END);
long size = ftell(file);
rewind(file);
ib = (utf8_int8_t *)malloc(size + 1);
if (!ib)
{
fprintf(stderr, "Memory allocation failed\n");
fclose(file);
exit(EXIT_FAILURE);
}
fread(ib, 1, size, file);
ib[size] = '\0';
fclose(file);
it = ib;
ob = (utf8_int8_t *)malloc(cap);
if (!ob)
{
fprintf(stderr, "Memory allocation failed\n");
exit(EXIT_FAILURE);
}
}
void save(const char *filename)
{
FILE *file = fopen(filename, "w");
fwrite(ob, 1, len, file);
fclose(file);
}
utf8_int32_t peek()
{
utf8_int32_t codepoint = 0;
utf8codepoint(it, &codepoint);
return codepoint;
}
void pop(int n)
{
for (int i = 0; i < n; i++)
{
utf8_int32_t codepoint = 0;
it = utf8codepoint(it, &codepoint);
}
}
void skip_whitespace()
{
while (peek() == ' ')
{
pop(1);
}
}
void put_codepoint(utf8_int32_t codepoint)
{
utf8_int8_t encoded[5] = {0};
utf8_int8_t *encoded_ptr = encoded;
utf8catcodepoint(encoded, codepoint, sizeof(encoded));
while (*encoded_ptr)
{
if (len + 1 >= cap)
{
size_t new_cap = cap == 0 ? 8 : cap * 2;
utf8_int8_t *temp_ob = (utf8_int8_t *)realloc(ob, new_cap);
if (temp_ob == NULL)
{
fprintf(stderr, "Memory allocation failed\n");
exit(EXIT_FAILURE);
}
ob = temp_ob;
cap = new_cap;
}
ob[len++] = *encoded_ptr++;
}
}
void put_string(const char *str)
{
while (*str)
{
put_codepoint(*str++);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment