mofosyne/strtok_escaped.c

## strtok_escaped.c
#include <stdio.h>
#include <string.h>

char *strtok_escaped(char *str, const char *delim) {
    // Tokenise a string from a single char deliminator
    // (strtok can deal with a deliminator string but for my purpose of splitting a psv table I only need one char)
    // (There are better ways to optimise this, but just wanted something to work for now)
    // https://gist.github.com/mofosyne/81c94740c0f33259606afa823562914c
    static char *last_token_end = NULL;

    if (str == NULL && last_token_end == NULL)
        return NULL;

    char *token_start = (str != NULL) ? str : last_token_end + 1;
    char *token_end = token_start;

    while (*token_end != '\0') {
        if (*token_end == '\\' && *(token_end + 1) == delim[0]) {
            // Handle escaped deliminator
            memmove(token_end, token_end + 1, strlen(token_end + 1) + 1);
            token_end++;
        } else if (strchr(delim, *token_end) != NULL) {
            // Found delimiter
            *token_end = '\0';
            last_token_end = token_end;
            return token_start;
        }
        token_end++;
    }

    if (*token_start == '\0')
        return NULL;

    last_token_end = token_end - 1;
    return token_start;
}

int main() {
    // Define test cases
    typedef struct {
        char *input;
        char *expected_tokens[10]; // Maximum of 10 tokens per test case
    } Test;

    char *delim = "|";
    Test tests[] = {
        {"apple|banana|cherry", {"apple", "banana", "cherry", NULL}},
        {"apple\\|banana|cherry", {"apple|banana", "cherry", NULL}},
        {"apple|banana|cherry\\", {"apple", "banana", "cherry\\", NULL}},
        {"apple\\|banana|cherry\\", {"apple|banana", "cherry\\", NULL}},
        {"apple|banana\\|cherry", {"apple", "banana|cherry", NULL}},
        {"apple\\|banana\\|cherry", {"apple|banana|cherry", NULL}},
        {"apple\\\\|banana|cherry", {"apple\\|banana", "cherry", NULL}},
        {"apple\\|banana\\|cherry\\", {"apple|banana|cherry\\", NULL}},
        {"apple|ba\\nana|cherry", {"apple", "ba\\nana", "cherry", NULL}},
        {"", {NULL}} // Empty string test case
    };

    int num_tests = sizeof(tests) / sizeof(tests[0]);
    int failed_tests = 0;

    // Iterate through each test case
    for (int i = 0; i < num_tests; i++) {
        printf("\nTest Case %d: '%s'\n", i, tests[i].input);
        int failed_sub_tests = 0;

        // Copy input string to a buffer for tokenization
        char str[512];
        strcpy(str, tests[i].input);

        // Tokenize the string and compare tokens with expected tokens
        int token_index = 0;
        char *token = strtok_escaped(str, delim);
        while (token != NULL && tests[i].expected_tokens[token_index] != NULL) {

            if (strcmp(token, tests[i].expected_tokens[token_index]) != 0) {
                printf("Token Mismatch - got '%s' but expecting '%s' - failed\n", token, tests[i].expected_tokens[token_index]);
                failed_sub_tests++;
            } else {
                printf("Token: '%s' - ok\n", token);
            }

            token_index++;
            token = strtok_escaped(NULL, delim);
        }

        if (tests[i].expected_tokens[token_index] != NULL) {
            printf("Incorrect number of tokens\n");
            failed_sub_tests++;
        }

        if (failed_sub_tests) {
            failed_tests++;
            printf("FAILED\n");

        }else {
            printf("PASSED\n");
        }
    }

    if (failed_tests > 0) {
        printf("\n%d test(s) failed.\n", failed_tests);
        return 1;
    } else {
        printf("\nAll tests passed.\n");
        return 0;
    }
}
	#include <stdio.h>
	#include <string.h>

	char strtok_escaped(char str, const char *delim) {
	// Tokenise a string from a single char deliminator
	// (strtok can deal with a deliminator string but for my purpose of splitting a psv table I only need one char)
	// (There are better ways to optimise this, but just wanted something to work for now)
	// https://gist.github.com/mofosyne/81c94740c0f33259606afa823562914c
	static char *last_token_end = NULL;

	if (str == NULL && last_token_end == NULL)
	return NULL;

	char *token_start = (str != NULL) ? str : last_token_end + 1;
	char *token_end = token_start;

	while (*token_end != '\0') {
	if (token_end == '\\' && (token_end + 1) == delim[0]) {
	// Handle escaped deliminator
	memmove(token_end, token_end + 1, strlen(token_end + 1) + 1);
	token_end++;
	} else if (strchr(delim, *token_end) != NULL) {
	// Found delimiter
	*token_end = '\0';
	last_token_end = token_end;
	return token_start;
	}
	token_end++;
	}

	if (*token_start == '\0')
	return NULL;

	last_token_end = token_end - 1;
	return token_start;
	}

	int main() {
	// Define test cases
	typedef struct {
	char *input;
	char *expected_tokens[10]; // Maximum of 10 tokens per test case
	} Test;

	char *delim = "\|";
	Test tests[] = {
	{"apple\|banana\|cherry", {"apple", "banana", "cherry", NULL}},
	{"apple\\\|banana\|cherry", {"apple\|banana", "cherry", NULL}},
	{"apple\|banana\|cherry\\", {"apple", "banana", "cherry\\", NULL}},
	{"apple\\\|banana\|cherry\\", {"apple\|banana", "cherry\\", NULL}},
	{"apple\|banana\\\|cherry", {"apple", "banana\|cherry", NULL}},
	{"apple\\\|banana\\\|cherry", {"apple\|banana\|cherry", NULL}},
	{"apple\\\\\|banana\|cherry", {"apple\\\|banana", "cherry", NULL}},
	{"apple\\\|banana\\\|cherry\\", {"apple\|banana\|cherry\\", NULL}},
	{"apple\|ba\\nana\|cherry", {"apple", "ba\\nana", "cherry", NULL}},
	{"", {NULL}} // Empty string test case
	};

	int num_tests = sizeof(tests) / sizeof(tests[0]);
	int failed_tests = 0;

	// Iterate through each test case
	for (int i = 0; i < num_tests; i++) {
	printf("\nTest Case %d: '%s'\n", i, tests[i].input);
	int failed_sub_tests = 0;

	// Copy input string to a buffer for tokenization
	char str[512];
	strcpy(str, tests[i].input);

	// Tokenize the string and compare tokens with expected tokens
	int token_index = 0;
	char *token = strtok_escaped(str, delim);
	while (token != NULL && tests[i].expected_tokens[token_index] != NULL) {

	if (strcmp(token, tests[i].expected_tokens[token_index]) != 0) {
	printf("Token Mismatch - got '%s' but expecting '%s' - failed\n", token, tests[i].expected_tokens[token_index]);
	failed_sub_tests++;
	} else {
	printf("Token: '%s' - ok\n", token);
	}

	token_index++;
	token = strtok_escaped(NULL, delim);
	}

	if (tests[i].expected_tokens[token_index] != NULL) {
	printf("Incorrect number of tokens\n");
	failed_sub_tests++;
	}

	if (failed_sub_tests) {
	failed_tests++;
	printf("FAILED\n");

	}else {
	printf("PASSED\n");
	}
	}

	if (failed_tests > 0) {
	printf("\n%d test(s) failed.\n", failed_tests);
	return 1;
	} else {
	printf("\nAll tests passed.\n");
	return 0;
	}
	}