Created
September 24, 2014 10:01
-
-
Save earlwlkr/2f96239389e8aba120bc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// By Earl Walker - 1212156 | |
#include <iostream> | |
#include <vector> | |
#include <string> | |
#include <locale> | |
using namespace std; | |
//#define MANUALLY_INPUT | |
// Take the characters between @from and @to from the passage string to form a word. | |
string to_word(string passage, int from, int to) | |
{ | |
string word; | |
for (int i = from + 1; i != to; i++) | |
{ | |
if (passage[i] == ' ') | |
return ""; | |
word.push_back(passage[i]); | |
} | |
return word; | |
} | |
// As the above but form a sentence. Insert '.' at the end. | |
string to_sentence(vector<string> passage, int from, int to) | |
{ | |
string sentence; | |
for (int i = from + 1; i != to; i++) | |
{ | |
sentence.append(passage[i]); | |
if (i != to - 1) | |
sentence.append(" "); | |
else | |
sentence.append("."); | |
} | |
return sentence; | |
} | |
bool is_number(const std::string& s) | |
{ | |
string::const_iterator iter = s.begin(); | |
while (iter != s.end() && isdigit(*iter)) | |
iter++; | |
return !s.empty() && iter == s.end(); | |
} | |
void main() | |
{ | |
string passage; | |
#ifdef MANUALLY_INPUT | |
cout << "Input passage: "; | |
getline(cin, passage); | |
#else | |
passage = " Parts 1 and 3 can be easily done with pillow, \ | |
but part 2 can be a little tricky(like 20 mins of experimentation). \ | |
Fortunately, this is Python and someone on the internet has already spent those 20 minutes. \ | |
Mr. Watson says 0.2, 0.3, 1.2, etc. are not the end of this sentence. \ | |
We use this image_utils module that does exactly what we need. \ | |
To make it work for pillow(it is written for PIL), take a look at porting pil code to pillow. \ | |
It is just a matter of changing the imports. The example code in the link is quite self explanatory."; | |
#endif | |
vector<string> objects; | |
int last_index = -1; | |
for (size_t size = passage.length(), i = 1; i != size; i++) | |
{ | |
if (passage[i] == ' ' || passage[i] == '.' || passage[i] == '?' || passage[i] == '!' || passage[i] == '\n') | |
{ | |
string word = to_word(passage, last_index, i); | |
if (word != "") | |
objects.push_back(word); | |
if (passage[i] != ' ') | |
objects.push_back("."); | |
last_index = i; | |
} | |
} | |
vector<string> IGNORE_PREFIX_LIST = {"mr", "mrs", "dr"}; | |
vector<string> IGNORE_SUFFIX_LIST = { "txt", "exe", "rar", "zip", "mp3", "mp4" }; | |
vector<string> sentences; | |
last_index = -1; | |
bool is_end_of_sentence; | |
for (size_t size = objects.size(), i = 0; i != size; i++) | |
{ | |
//cout << objects[i] << '\n'; | |
if (objects[i] == ".") | |
{ | |
int prev = i - 1; | |
is_end_of_sentence = true; | |
if (prev >= 0) | |
{ | |
for (int d = 0; d != IGNORE_PREFIX_LIST.size(); d++) | |
{ | |
if (_strcmpi(objects[prev].c_str(), IGNORE_PREFIX_LIST[d].c_str()) == 0) | |
{ | |
is_end_of_sentence = false; | |
break; | |
} | |
} | |
if (is_end_of_sentence) | |
{ | |
int next = i + 1; | |
if (next < size) | |
{ | |
for (int d = 0; d != IGNORE_SUFFIX_LIST.size(); d++) | |
{ | |
if (_strcmpi(objects[next].c_str(), IGNORE_SUFFIX_LIST[d].c_str()) == 0) | |
{ | |
is_end_of_sentence = false; | |
break; | |
} | |
} | |
if (is_number(objects[prev]) && is_number(objects[next])) | |
{ | |
is_end_of_sentence = false; | |
} | |
else if (objects[prev] == "etc" && islower(objects[next][0])) | |
{ | |
is_end_of_sentence = false; | |
} | |
} | |
} | |
} | |
if (is_end_of_sentence) | |
{ | |
sentences.push_back(to_sentence(objects, last_index, i)); | |
last_index = i; | |
} | |
} | |
} | |
for (size_t size = sentences.size(), i = 0; i != size; i++) | |
{ | |
cout << sentences[i] << '\n'; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment