Skip to content

Instantly share code, notes, and snippets.

@Wizmann
Created December 13, 2014 07:59
Show Gist options
  • Save Wizmann/35ebc4da3b8c074a0fb3 to your computer and use it in GitHub Desktop.
Save Wizmann/35ebc4da3b8c074a0fb3 to your computer and use it in GitHub Desktop.
simple_lexer.cc
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <vector>
#include <map>
#include <unordered_map>
using namespace std;
#define print(x) cout << x << endl
#define input(x) cin >> x
const vector<string> keywords = {
"auto", "break", "case", "char",
"const", "continue", "default", "do",
"double", "else", "enum", "extern",
"float", "for", "goto", "if",
"int", "long", "register", "return",
"short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union",
"unsigned", "void", "volatile", "while"
};
class Trie {
public:
void build(const vector<string>& str_vec) {
trie_init();
for (const auto& str: str_vec) {
trie_insert(str);
}
}
int search(int status, char c) {
auto& mp = _trie[status];
if (mp.find(c) == mp.end()) {
return -1;
}
return mp[c];
}
bool is_keyword(int status) {
if (status == -1) {
return false;
}
auto& mp = _trie[status];
return mp.find('\0') != mp.end();
}
private:
void trie_init() {
_trie.clear();
_trie.reserve(10000);
_trie.push_back(unordered_map<char, int>());
}
void trie_insert(const string& str) {
int ptr = 0;
for (auto c: str) {
auto& mp = _trie[ptr];
if (mp.find(c) == mp.end()) {
_trie.push_back(unordered_map<char, int>());
mp[c] = _trie.size() - 1;
}
ptr = mp[c];
}
auto& mp = _trie[ptr];
mp['\0'] = 1; // mark this as the end of a keyword
}
private:
vector<unordered_map<char, int> > _trie;
};
class TrieAdapter {
public:
TrieAdapter(Trie& trie): _status(0), _trie(trie) {}
void add(const char c) {
if (_status == -1) {
return;
}
_status = _trie.search(_status, c);
}
bool is_keyword() {
return _trie.is_keyword(_status);
}
void reset() {
_status = 0;
}
private:
int _status;
Trie& _trie;
};
class Word {
public:
Word(): _row(0), _col(0) {}
Word(int row, int col): _row(row), _col(col), _word() {}
bool is_empty() {
return _word.empty();
}
void add(const char c) {
_word += c;
}
void show(bool is_keyword) {
if (is_keyword) {
printf("%s\t(%d, %d)",
_word.c_str(), _row, _col);
} else {
printf("ID(%s)\t(%d, %d)",
_word.c_str(), _row, _col);
}
puts("");
}
void clear() {
_word = string();
}
private:
int _row, _col;
string _word;
};
int main() {
freopen("input.txt", "r", stdin);
char c;
int row = 0, col = 0;
Trie trie;
trie.build(keywords);
TrieAdapter adapter(trie);
Word word;
while (true) {
c = '\0';
bool is_eof = !cin.get(c);
if (word.is_empty()) {
word = Word(row, col);
}
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0') {
if (!word.is_empty()) {
bool is_keyword = adapter.is_keyword();
word.show(is_keyword);
}
word.clear();
adapter.reset();
} else {
adapter.add(c);
word.add(c);
}
if (c == '\n') {
row++;
col = 0;
} else {
col++;
}
if (is_eof) {
break;
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment