Last active
February 14, 2025 08:32
-
-
Save federicoops/79ae58130be90eced823ffe22dc18230 to your computer and use it in GitHub Desktop.
Simple C++ string tokenizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void tokenizer(const std::string &input, std::vector<std::string> &tokens, const char delimiter) { | |
auto tokenStart = input.begin(); | |
auto tokenEnd = tokenStart; | |
// Iterate through the input string, using two pointers | |
// advancing tokenEnd until a delimiter is found (or the end of the input string) | |
while(tokenEnd != input.end()) { | |
if(*tokenEnd == delimiter) { | |
tokens.push_back(std::string(tokenStart, tokenEnd)); | |
tokenStart = ++tokenEnd; | |
} else { | |
++tokenEnd; | |
} | |
} | |
if(tokenStart < tokenEnd) | |
tokens.push_back(std::string(tokenStart, tokenEnd)); | |
} | |
char** tok(const char* str, const char sep, int& token_count) { | |
token_count = 0; | |
for(unsigned i = 0; i < strlen(str); ++i) { | |
if(str[i] == sep) token_count++; | |
} | |
char** tokens = new char*[token_count]; | |
int curr_token = 0; | |
unsigned token_start = 0; | |
unsigned token_end = 0; | |
for(; token_end < strlen(str); ++token_end) { | |
if(str[token_end] == sep) { | |
tokens[curr_token] = new char[token_end-token_start]; | |
strncpy(tokens[curr_token++], &str[token_start], token_end-token_start); | |
token_start = token_end+1; | |
} | |
} | |
if(token_end > token_start) { | |
tokens[curr_token] = new char[token_end-token_start]; | |
strncpy(tokens[curr_token++], &str[token_start], token_end-token_start); | |
} | |
return tokens; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment