Created
August 5, 2014 23:46
-
-
Save Calmarius/d80e6ff50b505dc902cb to your computer and use it in GitHub Desktop.
Quoted string tokenizer function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <string.h> | |
/** | |
* Tokenizes string. | |
* | |
* str [in]: pointer to a zero terminated string. Required | |
* token [out]: pointer to an user specified buffer to hold the token. The user must ensure the buffer is long enough. Required. | |
* delimiter [in]: the character that delimits the tokens. (eg. space). Shouldn't be '\0'. | |
* quote [in]: the character that act as a quote to deal with tokens that contain delimiters. (eg. ") Shouldn't be '\0'. | |
* escape [in]: the character that's used to escape quotes and itself in a quoted string (eg. \) Shouldn't be '\0'. | |
* | |
* Returns a pointer to a location inside the string where the search for the next token can be started. | |
* Returns NULL if we reached the end of the string. | |
*/ | |
const char *qtokenize(const char *str, char *token, char delimiter, char quote, char escape) | |
{ | |
assert(delimiter != 0); | |
assert(quote != 0); | |
assert(escape != 0); | |
assert(str); | |
assert(token); | |
/* Skip starting delimiters */ | |
while (*str == delimiter) str++; | |
if (!*str) return NULL; | |
if (*str == quote) | |
{ | |
str++; /*< Skip quote*/ | |
while (*str) | |
{ | |
if (*str == escape) | |
{ | |
/* Escape sequence */ | |
str++; | |
if ((*str == quote) || (*str == escape)) | |
{ | |
/* Add literal quote or escape. */ | |
*token = *str; | |
token++; | |
str++; | |
continue; | |
} | |
else | |
{ | |
/* Otherwise keep going as usual */ | |
str--; | |
} | |
} | |
if (*str == quote) | |
{ | |
/* Closing quote. Done. */ | |
str++; | |
break; | |
} | |
else | |
{ | |
/* Regular char, add it to the output. */ | |
*token = *str; | |
token++; | |
str++; | |
} | |
} | |
/* Close the token. */ | |
*token = 0; | |
} | |
else | |
{ | |
/* Regular token. Read till delimiter. */ | |
while (*str && (*str != delimiter)) | |
{ | |
*token = *str; | |
token++; | |
str++; | |
} | |
/* Close the token. */ | |
*token = 0; | |
} | |
return str; | |
} | |
int main() | |
{ | |
const char *example = "foo bar \"baz baz\" \"Stuff that contains \\\". \""; | |
const char *example2 = "foo bar"; | |
const char *unfinishedQuoted = "foo bar \"This is an unfinished quot"; | |
const char *doubleQuote = "foo bar \"In this string a doubled quotes escape a single quote like this: \"\"quote\"\" \""; | |
const char *empty = ""; | |
const char *next; | |
char token[200]; | |
memset(token, 0xCC, sizeof(token)); | |
printf("Example 1: \n"); | |
while ( (example = qtokenize(example, token, ' ', '"', '\\')) != NULL) | |
{ | |
printf("Token is \'%s\'\n", token); | |
} | |
printf("Example 2: \n"); | |
while ( (example2 = qtokenize(example2, token, ' ', '"', '\\')) != NULL) | |
{ | |
printf("Token is \'%s\'\n", token); | |
} | |
printf("Invalid quoted: \n"); | |
while ( (unfinishedQuoted = qtokenize(unfinishedQuoted, token, ' ', '"', '\\')) != NULL) | |
{ | |
printf("Token is \'%s\'\n", token); | |
} | |
printf("Double quoted: \n"); | |
while ( (doubleQuote = qtokenize(doubleQuote, token, ' ', '"', '"')) != NULL) | |
{ | |
printf("Token is \'%s\'\n", token); | |
} | |
printf("Empty string: \n"); | |
while ( (empty = qtokenize(empty, token, ' ', '"', '\\')) != NULL) | |
{ | |
printf("Token is \'%s\'\n", token); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment