Skip to content

Instantly share code, notes, and snippets.

@bit-hack
Created June 14, 2017 15:37
Show Gist options
  • Save bit-hack/f816a9b6f971ef4198b7aadd0bed3759 to your computer and use it in GitHub Desktop.
Save bit-hack/f816a9b6f971ef4198b7aadd0bed3759 to your computer and use it in GitHub Desktop.
A very small expression tokenizer
#include <assert.h>
#include <string>
#include <vector>
static bool is_operator(const char ch) {
switch (ch) {
case '(':
case ')':
case '+':
case '-':
case '/':
case '*':
case '%':
case '&':
case '|':
return true;
default:
return false;
}
}
static bool is_value(const char ch) {
bool ret = false;
ret |= ch >= 'a' && ch <= 'z';
ret |= ch >= 'A' && ch <= 'Z';
ret |= ch >= '0' && ch <= '9';
ret |= ch == '$';
ret |= ch == '_';
return ret;
}
static bool is_whitespace(const char ch) {
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
}
static size_t tokenize(std::string input, std::vector<std::string> &out) {
out.clear();
const char *h = input.c_str();
const char *t = input.c_str();
for (; *h != '\0'; ++h) {
const char ch = *h;
// if head == tail
if (h == t) {
// skip over whitespaces
if (is_whitespace(ch)) {
++t;
continue;
}
// push operators immediately
if (is_operator(ch)) {
out.push_back(std::string(1, ch));
t = h + 1;
continue;
}
}
// if head != tail
else {
// non value types signal push point
if (!is_value(ch)) {
std::string tok = std::string(t, h);
out.push_back(std::move(tok));
t = h;
h -= 1;
}
}
}
// push any remaining tokens
if (h != t) {
out.push_back(std::string{t, h});
}
// return number of parsed tokens
return out.size();
}
void expect(size_t exp, size_t in) {
if (exp != in) {
assert(!"Fail");
}
}
int main() {
std::vector<std::string> out;
expect( 9, tokenize(std::string{"$thing + 4*( $var - 0x2345)"}, out));
expect( 3, tokenize(std::string{"$thing + 1234"}, out));
expect( 5, tokenize(std::string{"$aa ++ -0x3458"}, out));
expect( 4, tokenize(std::string{"$a $b_c 1234 0xsdf "}, out));
expect( 2, tokenize(std::string{" + - "}, out));
expect(11, tokenize(std::string{"((1234+45)-1234)*10"}, out));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment