Skip to content

Instantly share code, notes, and snippets.

@artemkin
Last active November 3, 2015 12:39
Show Gist options
  • Save artemkin/d2685305014b8559917e to your computer and use it in GitHub Desktop.
Save artemkin/d2685305014b8559917e to your computer and use it in GitHub Desktop.
Simple tokenizer
#include <iostream>
#include <string>
#include <functional>
#include <ctype.h>
using Tokenizer = std::function<std::string()>;
Tokenizer CreateTokenizer(const std::string& i_str)
{
if (i_str.empty())
return [] { return std::string(); };
enum Type
{
eUnknown,
eAlpha,
eDigit,
ePunct
};
auto classify = [](char ch)
{
if (isalpha(ch))
return eAlpha;
if (isdigit(ch))
return eDigit;
if (ispunct(ch))
return ePunct;
return eUnknown;
};
Type state = classify(i_str[0]);
size_t idx = 0;
return [state, classify, idx, &i_str]() mutable
{
if (idx >= i_str.size())
return std::string();
size_t prevIdx = idx;
while (idx < i_str.size())
{
char ch = i_str[idx];
auto type = classify(ch);
if (state != type)
{
state = type;
return i_str.substr(prevIdx, idx - prevIdx);
}
state = type;
++idx;
}
return i_str.substr(prevIdx, idx - prevIdx);
};
}
int main() {
std::string str = "trades=4;pur1=short;typ1=put;pur2=long;typ2=put;sym2=sym1;exp2=exp1;qty2=qty1;str2>str1;pur3=long;typ3=call;sym3=sym1;exp3=exp1;qty3=qty1;str3>str2;pur4=short;typ4=call;sym4=sym1;exp4=exp1;qty4=qty1;str4>str3;enforce=all;";
auto getNextToken = CreateTokenizer(str);
for (;;)
{
const auto& token = getNextToken();
if (token.empty())
{
break;
}
std::cout << "token(" << token << ")" << std::endl;
}
return 0;
}
@artemkin
Copy link
Author

artemkin commented Nov 3, 2015

output:

token(trades)
token(=)
token(4)
token(;)
token(pur)
token(1)
token(=)
token(short)
token(;)
token(typ)
token(1)
token(=)
token(put)
token(;)
token(pur)
token(2)
token(=)
token(long)
token(;)
token(typ)
token(2)
token(=)
token(put)
token(;)
token(sym)
token(2)
token(=)
token(sym)
token(1)
token(;)
token(exp)
token(2)
token(=)
token(exp)
token(1)
token(;)
token(qty)
token(2)
token(=)
token(qty)
token(1)
token(;)
token(str)
token(2)
token(>)
token(str)
token(1)
token(;)
token(pur)
token(3)
token(=)
token(long)
token(;)
token(typ)
token(3)
token(=)
token(call)
token(;)
token(sym)
token(3)
token(=)
token(sym)
token(1)
token(;)
token(exp)
token(3)
token(=)
token(exp)
token(1)
token(;)
token(qty)
token(3)
token(=)
token(qty)
token(1)
token(;)
token(str)
token(3)
token(>)
token(str)
token(2)
token(;)
token(pur)
token(4)
token(=)
token(short)
token(;)
token(typ)
token(4)
token(=)
token(call)
token(;)
token(sym)
token(4)
token(=)
token(sym)
token(1)
token(;)
token(exp)
token(4)
token(=)
token(exp)
token(1)
token(;)
token(qty)
token(4)
token(=)
token(qty)
token(1)
token(;)
token(str)
token(4)
token(>)
token(str)
token(3)
token(;)
token(enforce)
token(=)
token(all)
token(;)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment