Skip to content

Instantly share code, notes, and snippets.

@janisozaur
Created May 26, 2011 18:51
Show Gist options
  • Save janisozaur/993772 to your computer and use it in GitHub Desktop.
Save janisozaur/993772 to your computer and use it in GitHub Desktop.
tokenizer
#include <iostream>
#include <fstream>
#include <string>
#include <cassert>
using namespace std;
class Transformer
{
public:
string transform(string str)
{
if (str.empty())
return str;
if (str.at(0) >= 'A' && str.at(0) <= 'Z')
{
if (str.at(str.length() - 1) == '.')
{
sDot = true;
}
return str;
}
if (sDot && str.at(0) <= 'z' && str.at(0) >= 'a')
{
str[0] = str.at(0) - 32;
sDot = false;
}
for (unsigned int i = 1; i < str.length(); i++)
{
if (str.at(i) <= 'Z' && str.at(i) >= 'A')
{
str[i] = str.at(i) + 32;
}
}
if (str.at(str.length() - 1) == '.')
{
sDot = true;
}
return str;
}
private:
static bool sDot;
};
bool Transformer::sDot = false;
class Bufor
{
public:
Bufor(string str) :
mData(str)
{
}
string parse ()
{
string temp;
Transformer t;
string result;
for (unsigned int i = 0; i < mData.length(); i++)
{
switch (mData.at(i)){
case ' ':
case '\t':
result += t.transform(temp) + mData.at(i);
temp.resize(0);
break;
default:
temp += mData.at(i);
}
}
result += t.transform(temp);
return result;
}
private:
string mData;
};
int main(int argc, char *argv[])
{
assert(argc >= 2);
string wypis;
ifstream plik(argv[1]);
while (!plik.eof())
{
getline(plik, wypis);
Bufor b(wypis);
cout << b.parse() << endl;
}
plik.close();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment