Skip to content

Instantly share code, notes, and snippets.

@Rhomboid
Created April 2, 2016 17:02
Show Gist options
  • Save Rhomboid/c8a1f52225a5be656250a3b6903d434f to your computer and use it in GitHub Desktop.
Save Rhomboid/c8a1f52225a5be656250a3b6903d434f to your computer and use it in GitHub Desktop.
string tokenization in C with dynamic memory
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
size_t tokenize(const char *s, const char *delim, char ***tokenarray)
{
size_t size = 0, capacity = 4, wordlen;
const char *begin = s, *end;
char **tokens = malloc(capacity * sizeof(char *));
do {
begin += strspn(begin, delim);
if(!*begin)
break;
end = strpbrk(begin, delim);
wordlen = end ? (size_t)(end - begin) : strlen(begin);
if(size == capacity) {
capacity = capacity * 3 / 2;
tokens = realloc(tokens, capacity * sizeof(char *));
}
memcpy(tokens[size] = malloc(wordlen + 1), begin, wordlen);
tokens[size++][wordlen] = 0;
} while((begin = end));
*tokenarray = tokens;
return size;
}
int main(void)
{
const char *str = "The quick brown fox jumps over the lazy dog.\n";
char **tokens;
size_t numtok;
numtok = tokenize(str, " .\n", &tokens);
for(size_t i = 0; i < numtok; i++) {
printf("got <%s>\n", tokens[i]);
}
for(size_t i = 0; i < numtok; i++) {
free(tokens[i]);
}
free(tokens);
}
Copy link

ghost commented Apr 4, 2016

Thanks for this! Great modular function.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment