Skip to content

Instantly share code, notes, and snippets.

@Calmarius
Created August 5, 2014 23:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Calmarius/d80e6ff50b505dc902cb to your computer and use it in GitHub Desktop.
Save Calmarius/d80e6ff50b505dc902cb to your computer and use it in GitHub Desktop.
Quoted string tokenizer function
#include <stdio.h>
#include <assert.h>
#include <string.h>
/**
* Tokenizes string.
*
* str [in]: pointer to a zero terminated string. Required
* token [out]: pointer to an user specified buffer to hold the token. The user must ensure the buffer is long enough. Required.
* delimiter [in]: the character that delimits the tokens. (eg. space). Shouldn't be '\0'.
* quote [in]: the character that act as a quote to deal with tokens that contain delimiters. (eg. ") Shouldn't be '\0'.
* escape [in]: the character that's used to escape quotes and itself in a quoted string (eg. \) Shouldn't be '\0'.
*
* Returns a pointer to a location inside the string where the search for the next token can be started.
* Returns NULL if we reached the end of the string.
*/
const char *qtokenize(const char *str, char *token, char delimiter, char quote, char escape)
{
assert(delimiter != 0);
assert(quote != 0);
assert(escape != 0);
assert(str);
assert(token);
/* Skip starting delimiters */
while (*str == delimiter) str++;
if (!*str) return NULL;
if (*str == quote)
{
str++; /*< Skip quote*/
while (*str)
{
if (*str == escape)
{
/* Escape sequence */
str++;
if ((*str == quote) || (*str == escape))
{
/* Add literal quote or escape. */
*token = *str;
token++;
str++;
continue;
}
else
{
/* Otherwise keep going as usual */
str--;
}
}
if (*str == quote)
{
/* Closing quote. Done. */
str++;
break;
}
else
{
/* Regular char, add it to the output. */
*token = *str;
token++;
str++;
}
}
/* Close the token. */
*token = 0;
}
else
{
/* Regular token. Read till delimiter. */
while (*str && (*str != delimiter))
{
*token = *str;
token++;
str++;
}
/* Close the token. */
*token = 0;
}
return str;
}
int main()
{
const char *example = "foo bar \"baz baz\" \"Stuff that contains \\\". \"";
const char *example2 = "foo bar";
const char *unfinishedQuoted = "foo bar \"This is an unfinished quot";
const char *doubleQuote = "foo bar \"In this string a doubled quotes escape a single quote like this: \"\"quote\"\" \"";
const char *empty = "";
const char *next;
char token[200];
memset(token, 0xCC, sizeof(token));
printf("Example 1: \n");
while ( (example = qtokenize(example, token, ' ', '"', '\\')) != NULL)
{
printf("Token is \'%s\'\n", token);
}
printf("Example 2: \n");
while ( (example2 = qtokenize(example2, token, ' ', '"', '\\')) != NULL)
{
printf("Token is \'%s\'\n", token);
}
printf("Invalid quoted: \n");
while ( (unfinishedQuoted = qtokenize(unfinishedQuoted, token, ' ', '"', '\\')) != NULL)
{
printf("Token is \'%s\'\n", token);
}
printf("Double quoted: \n");
while ( (doubleQuote = qtokenize(doubleQuote, token, ' ', '"', '"')) != NULL)
{
printf("Token is \'%s\'\n", token);
}
printf("Empty string: \n");
while ( (empty = qtokenize(empty, token, ' ', '"', '\\')) != NULL)
{
printf("Token is \'%s\'\n", token);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment