Skip to content

Instantly share code, notes, and snippets.

@DavideGalilei
Last active June 16, 2022 14:04
Show Gist options
  • Save DavideGalilei/147bf298122e825225829db09f3357a0 to your computer and use it in GitHub Desktop.
Save DavideGalilei/147bf298122e825225829db09f3357a0 to your computer and use it in GitHub Desktop.
Mini XML parser in C (EOF are not handled correctly)
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include "seq.c"
typedef struct XmlNode XmlNode;
typedef Seq(XmlNode*) XmlNodes;
struct XmlNode {
char *name;
char *text;
XmlNodes *children;
};
int fpeek(FILE *stream) {
int c;
c = fgetc(stream);
ungetc(c, stream);
return c;
}
#define INDENTATION 4
void Xml_debug_(XmlNode *node, int depth) {
if (node == NULL) return;
else if (depth != 0) printf("%*c", depth * INDENTATION, ' ');
printf("<%s>\n", node->name);
if (node->text != NULL) printf("%*c%s\n", (depth + 1) * INDENTATION, ' ', node->text);
if (node->children != NULL) {
for (size_t i = 0; i < node->children->len; i++) {
Xml_debug_(node->children->data[i], depth + 1);
}
}
if (depth != 0) printf("%*c", depth * INDENTATION, ' ');
printf("</%s>\n", node->name);
}
void Xml_debug(XmlNode *node) {
Xml_debug_(node, 0);
}
void Xml_free(XmlNode *node) {
if (node == NULL) return;
if (node->name != NULL) free(node->name);
if (node->text != NULL) free(node->text);
if (node->children != NULL) {
for (size_t i = 0; i < node->children->len; i++) {
Xml_free(node->children->data[i]);
}
Seq_destroy(*node->children);
}
}
XmlNode *Xml_parse_file(FILE *stream) {
XmlNode *result = calloc(1, sizeof(*result));
result->children = calloc(1, sizeof(*result->children));
char c;
while ((c = fgetc(stream)) != EOF) {
// printf("%c", c);
// fflush(stdout);
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n': {
continue;
}
case '<': {
size_t namelen = 0;
while ((c = fgetc(stream)) != '>') namelen++;
result->name = malloc(namelen + 1);
fseek(stream, -(namelen + 1), SEEK_CUR);
for (size_t i = 0; i < namelen; i++) result->name[i] = fgetc(stream);
result->name[namelen] = '\0';
// printf("Name: %s\n", result->name);
fseek(stream, 1, SEEK_CUR); // Skip ">"
Seq(char) text = {0};
while (1) {
while ((c = fgetc(stream)) != '<') {
if (text.len == 0 && (c == ' ' || c == '\n' || c == '\r' || c == '\t')) continue;
Seq_add(text, c);
};
if (fpeek(stream) == '/') {
while ((c = fgetc(stream)) != '>');
break;
}
else {
fseek(stream, -1, SEEK_CUR);
XmlNode *got = Xml_parse_file(stream);
// printf("\nNode debug: "); Xml_debug(got);
Seq_add((*result->children), got);
// printf("Seq len: %ld\n", result->children->len);
}
}
result->text = malloc(text.len + 1);
for (size_t i = 0; i < text.len; i++) result->text[i] = text.data[i];
result->text[text.len] = '\0';
Seq_destroy(text);
return result;
}
default: {
printf("Unreachable: '%c'!\n", c);
abort();
}
}
}
// printf("Name test: %s | Children: %ld\n", result->name, result->children->len);
return result;
}
int main(void) {
FILE *file = fopen("test.xml", "r");
if (!file) {
printf("Could not open file: %s\n", strerror(errno));
return (EXIT_FAILURE);
}
/* XmlNodes children = {0};
XmlNode node = { .name = "body", .children = &children };
XmlNode inner = { .name = "test", .text = "asdasd" };
Seq_add(children, &inner);
Seq_add(children, &inner);
Seq_add(children, &inner); */
XmlNode *node;
while (1) {
// printf("Parsing...\n");
node = Xml_parse_file(file);
if (node->name == NULL) {
Xml_free(node);
break;
}
// printf("Result:\n");
Xml_debug(node);
Xml_free(node);
};
return (EXIT_SUCCESS);
}
debug:
gcc -o main --std=c99 -Wall -Wextra -Werror -pedantic -g main.c && gdb ./main
run:
gcc -o main --std=c99 -Wall -Wextra -Werror -pedantic main.c && ./main
#ifndef _SEQ_INCLUDED
#define _SEQ_INCLUDED
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
#define Seq(T) struct {T *data; T temp; size_t size; size_t len;}
#ifndef ARENA_SIZE
#define ARENA_SIZE 1024
#endif
#define Seq_add(seq, element) _Seq_add((void**)(&seq.data), &seq.size, &seq.len, sizeof(seq.data[0]), (seq.temp = element, &seq.temp))
#define Seq_pop(seq, index) _Seq_pop((void**)(&seq.data), &seq.size, &seq.len, sizeof(seq.data[0]), index)
#define Seq_destroy(seq) _Seq_destroy((seq).data)
int _Seq_add(void **data, size_t *data_size, size_t *len, size_t elem_size, void *element) {
if (((*len) + 1) * elem_size > (*data_size)) {
if ((*data = realloc(*data, *data_size + (ARENA_SIZE * elem_size))) == NULL) return -1;
*data_size += (ARENA_SIZE * elem_size);
}
// printf("Seq size: %d -- Seq len: %d -- Element: %d\n", *data_size, *len, *(int*)element);
memcpy((void*)((intptr_t)(*data) + (intptr_t)((*len) * elem_size)), element, elem_size);
(*len)++;
return 0;
}
int _Seq_pop(void **data, size_t *data_size, size_t *len, size_t elem_size, size_t index) {
if (index >= (*len)) return -1;
if (index != (*len - 1)) memmove((void*)((intptr_t)(*data) + (intptr_t)(index * elem_size)), (void*)((intptr_t)(*data) + (intptr_t)(index * elem_size) + (intptr_t)elem_size), ((*len) - index) * elem_size);
(*len)--;
if (((*len) - 1) * elem_size > (*data_size)) {
if ((*data = realloc(*data, *data_size - (ARENA_SIZE * elem_size))) == NULL) return -1;
*data_size -= (ARENA_SIZE * elem_size);
}
return 0;
}
void _Seq_destroy(void *data) {
free(data);
}
/* int main(void) {
Seq(int) numbers = {0};
for (size_t i = 0; i < 20; i++) {
Seq_add(numbers, i);
printf("Added %d (%d)...\n", i, numbers.data[numbers.len - 1]);
if (i % 2) Seq_pop(numbers, numbers.len - 1);
}
for (size_t i = 0; i < numbers.len; i++) {
printf("numbers[%d] = %d (Arena: %d)\n", i, numbers.data[i], numbers.size);
}
} */
#endif
<note>
<to>
<user>Jeff</user>
</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>
<note>
<to>
<user>Dani</user>
</to>
<from>Jeff</from>
<heading>Hello</heading>
<body>World</body>
</note>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment