Skip to content

Instantly share code, notes, and snippets.

@afiedler
Created November 4, 2010 17:12
Show Gist options
  • Save afiedler/662786 to your computer and use it in GitHub Desktop.
Save afiedler/662786 to your computer and use it in GitHub Desktop.
C string tokenizer that returns empty strings for adjacent delimiters
/* TOKENIZE -- demonstrates a way to tokenize a string, with adjacent delimiters
* returned as empty strings.
* Partially based on: http://stackoverflow.com/questions/874161
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
/* Function declarations */
unsigned int tokenize(const char* text, char delim, char*** output);
void reclaim2D(char ***store, unsigned int itemCount);
int main(void) {
/* Tokenizer test */
char* string1 = "ABC,DE,F";
char* string2 = "AB,C,DEF,G,";
char* string3 = ",";
char* string4 = "";
char* string5 = ",,A,,";
char* string6 = ",ABC,";
unsigned int i,c;
char** tokens = NULL;
printf("string1: '%s'\n", string1);
c=tokenize(string1,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
printf("string2: '%s'\n", string2);
c=tokenize(string2,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
printf("string3: '%s'\n", string3);
c=tokenize(string3,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
printf("string4: '%s'\n", string4);
c=tokenize(string4,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
printf("string5: '%s'\n", string5);
c=tokenize(string5,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
printf("string6: '%s'\n", string6);
c=tokenize(string6,',',&tokens);
for(i=0;i<c;i++) {
printf("\ttoken %d: '%s'\n",i+1,tokens[i]);
}
reclaim2D(&tokens, c);
return 0;
}
unsigned int tokenize(const char* text, char delim, char*** output) {
if((*output) != NULL) return -1; /* I will allocate my own storage */
int ndelims,i,j,ntokens,starttok,endtok;
// First pass, count the number of delims
i=0;
ndelims=0;
while(text[i] != '\0') {
if(text[i] == delim) ndelims++;
i++;
}
// The number of delims is one less than the number of tokens
ntokens=ndelims+1;
// Now, allocate an array of (char*)'s equal to the number of tokens
(*output) = (char**) malloc(sizeof(char*)*ntokens);
// Now, loop through and extract each token
starttok=0;
endtok=0;
i=0;
j=0;
while(text[i] != '\0') {
// Reached the end of a token?
if(text[i] == delim) {
endtok = i;
// Allocate a char array to hold the token
(*output)[j] = (char*) malloc(sizeof(char)*(endtok-starttok+1));
// If the token is not empty, copy over the token
if(endtok-starttok > 0)
memcpy((*output)[j],&text[starttok],(endtok-starttok));
// Null-terminate the string
(*output)[j][(endtok-starttok)] = '\0';
// The next token starts at i+1
starttok = i+1;
j++;
}
i++;
}
// Deal with the last token
endtok = i;
// Allocate a char array to hold the token
(*output)[j] = (char*) malloc(sizeof(char)*(endtok-starttok+1));
// If the token is not empty, copy over the token
if(endtok-starttok > 0)
memcpy((*output)[j],&text[starttok],(endtok-starttok));
// Null-terminate the string
(*output)[j][(endtok-starttok)] = '\0';
return ntokens;
}
void reclaim2D(char ***store, unsigned int itemCount)
{
int x;
for (x = 0; x < itemCount; ++x)
{
if((*store)[x] != NULL) free((*store)[x]);
(*store)[x] = NULL;
}
if((*store) != NULL) free((*store));
(*store) = NULL;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment