Skip to content

Instantly share code, notes, and snippets.

@ror3d
Created December 9, 2013 00:31
Show Gist options
  • Save ror3d/7865655 to your computer and use it in GitHub Desktop.
Save ror3d/7865655 to your computer and use it in GitHub Desktop.
Sample code for Andreu's project. It should get the data inside the <text> tag of a simple xml file and separate it into words.
void readFile()
{
ifstream file;
char c;
enum ReadState {
StartFile, EndFile, Tag, Text
};
ReadState state = StartFile;
ReadState lastState = StartFile;
stringstream ss;
while(file)
{
file >> c;
switch(state)
{
case StartFile:
if(c=='<')
{
ss.str("");
state = Tag;
lastState = StartFile;
}
break;
case Tag:
if(c=='>')
{
if(ss.str().INSENSITIVE_EQUAL("text")) // Implement INSENSITIVE_EQUAL
{
state = Text;
}
else
{
state = lastState;
}
}
break;
case Text:
readText(file);
state = EndFile;
break;
}
}
}
void readText(istream file)
{
stringstream ss;
char c;
bool hasAt = false;
bool startsWithLT = false;
while(file)
{
file >> c;
if(ss.str().length() == 0)
{
startsWithLT = (c == '<');
}
if(c==' ' || c=='\n' || c=='\r' || c=='\t') // Add punctuation too?
{
if(startsWithLT && ss.str().INSENSITIVE_EQUAL("</text>"))
{
return;
}
if(hasAt)
{
// Check if it's an email
}
// Here we would save the word somewhere
useWord(ss.str());
hasAt = false;
ss.str("");
}
else
{
hasAt = hasAt || (c == '@');
ss << c;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment