Skip to content

Instantly share code, notes, and snippets.

@earlwlkr
Created September 24, 2014 10:01
Show Gist options
  • Save earlwlkr/2f96239389e8aba120bc to your computer and use it in GitHub Desktop.
Save earlwlkr/2f96239389e8aba120bc to your computer and use it in GitHub Desktop.
// By Earl Walker - 1212156
#include <iostream>
#include <vector>
#include <string>
#include <locale>
using namespace std;
//#define MANUALLY_INPUT
// Take the characters between @from and @to from the passage string to form a word.
string to_word(string passage, int from, int to)
{
string word;
for (int i = from + 1; i != to; i++)
{
if (passage[i] == ' ')
return "";
word.push_back(passage[i]);
}
return word;
}
// As the above but form a sentence. Insert '.' at the end.
string to_sentence(vector<string> passage, int from, int to)
{
string sentence;
for (int i = from + 1; i != to; i++)
{
sentence.append(passage[i]);
if (i != to - 1)
sentence.append(" ");
else
sentence.append(".");
}
return sentence;
}
bool is_number(const std::string& s)
{
string::const_iterator iter = s.begin();
while (iter != s.end() && isdigit(*iter))
iter++;
return !s.empty() && iter == s.end();
}
void main()
{
string passage;
#ifdef MANUALLY_INPUT
cout << "Input passage: ";
getline(cin, passage);
#else
passage = " Parts 1 and 3 can be easily done with pillow, \
but part 2 can be a little tricky(like 20 mins of experimentation). \
Fortunately, this is Python and someone on the internet has already spent those 20 minutes. \
Mr. Watson says 0.2, 0.3, 1.2, etc. are not the end of this sentence. \
We use this image_utils module that does exactly what we need. \
To make it work for pillow(it is written for PIL), take a look at porting pil code to pillow. \
It is just a matter of changing the imports. The example code in the link is quite self explanatory.";
#endif
vector<string> objects;
int last_index = -1;
for (size_t size = passage.length(), i = 1; i != size; i++)
{
if (passage[i] == ' ' || passage[i] == '.' || passage[i] == '?' || passage[i] == '!' || passage[i] == '\n')
{
string word = to_word(passage, last_index, i);
if (word != "")
objects.push_back(word);
if (passage[i] != ' ')
objects.push_back(".");
last_index = i;
}
}
vector<string> IGNORE_PREFIX_LIST = {"mr", "mrs", "dr"};
vector<string> IGNORE_SUFFIX_LIST = { "txt", "exe", "rar", "zip", "mp3", "mp4" };
vector<string> sentences;
last_index = -1;
bool is_end_of_sentence;
for (size_t size = objects.size(), i = 0; i != size; i++)
{
//cout << objects[i] << '\n';
if (objects[i] == ".")
{
int prev = i - 1;
is_end_of_sentence = true;
if (prev >= 0)
{
for (int d = 0; d != IGNORE_PREFIX_LIST.size(); d++)
{
if (_strcmpi(objects[prev].c_str(), IGNORE_PREFIX_LIST[d].c_str()) == 0)
{
is_end_of_sentence = false;
break;
}
}
if (is_end_of_sentence)
{
int next = i + 1;
if (next < size)
{
for (int d = 0; d != IGNORE_SUFFIX_LIST.size(); d++)
{
if (_strcmpi(objects[next].c_str(), IGNORE_SUFFIX_LIST[d].c_str()) == 0)
{
is_end_of_sentence = false;
break;
}
}
if (is_number(objects[prev]) && is_number(objects[next]))
{
is_end_of_sentence = false;
}
else if (objects[prev] == "etc" && islower(objects[next][0]))
{
is_end_of_sentence = false;
}
}
}
}
if (is_end_of_sentence)
{
sentences.push_back(to_sentence(objects, last_index, i));
last_index = i;
}
}
}
for (size_t size = sentences.size(), i = 0; i != size; i++)
{
cout << sentences[i] << '\n';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment