Skip to content

Instantly share code, notes, and snippets.

@shoooe
Created April 30, 2015 06:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shoooe/1ea1ca26cc835b283db6 to your computer and use it in GitHub Desktop.
Save shoooe/1ea1ca26cc835b283db6 to your computer and use it in GitHub Desktop.
Some random corrector which, given a dictionary file with "<word> <frequency>" space separated pairs and fed with a word, returns the closest word of distance 1 with the highest frequency.
#include <iostream>
#include <fstream>
#include <map>
#include <stdexcept>
#include <vector>
namespace a8 {
// aliases
using word = std::string;
using dict = std::map<word, unsigned int>;
// generates all similar strings by removing 1 character
// from the original string in different places
template<typename OutIt>
OutIt generate_similar_by_deletion(a8::word const& word, OutIt first) {
std::cout << "Generating by deletetion...\n";
for (std::size_t i = 0; i < word.size() - 1; i++) {
a8::word curr = word;
curr.erase(i, 1);
std::cout << "\tGenerated: " << curr << '\n';
*first++ = curr;
}
return first;
}
// generates all similar strings by substituting 1 character
// from the alphabet in different positions
template<typename OutIt>
OutIt generate_similar_by_substitution(a8::word const& word, OutIt first) {
std::cout << "Generating by substitution...\n";
for (std::size_t i = 0; i < word.size(); i++) {
for (char c = 'A'; c <= 'Z'; c++) {
a8::word curr = word;
curr.replace(i, 1, 1, c); // inserts for 1 time at position i the character c for 1 time
std::cout << "\tGenerated: " << curr << '\n';
*first++ = curr;
}
}
return first;
}
// generates all similar strings by inserting 1 character
// from the alphabet in different positions
template<typename OutIt>
OutIt generate_similar_by_insertion(a8::word const& word, OutIt first) {
std::cout << "Generating by insertion...\n";
for (std::size_t i = 0; i < word.size() + 1; i++) {
for (char c = 'A'; c <= 'Z'; c++) {
a8::word curr = word;
curr.insert(i, 1, c); // inserts at i
std::cout << "\tGenerated: " << curr << '\n';
*first++ = curr;
}
}
return first;
}
// generates all similar string with distance of 1,
// by deletion, substitution and insertion
template<typename OutIt>
OutIt generate_similar(a8::word const& word, OutIt begin) {
auto begin1 = generate_similar_by_deletion(word, begin);
auto begin2 = generate_similar_by_substitution(word, begin1);
auto begin3 = generate_similar_by_insertion(word, begin2);
return begin3;
}
// returns a copy of the value with the given key or
// the passed in value if not found
template<typename Dict, typename Key, typename Value>
Value at_or(Dict const& map, Key const& key, Value other) {
try { return map.at(key); }
catch (std::out_of_range const&) { return other; }
}
// finds the iterator to the best match in the dictionary if it exists,
// otherwise it returns the end iterator
template<typename Dict>
typename Dict::iterator best_match(Dict& dictionary, a8::word const& word) {
auto it = dictionary.find(word);
if (it != dictionary.end()) return it;
std::vector<a8::word> similar;
generate_similar(word, std::back_inserter(similar));
auto similar_it = std::max_element(similar.begin(), similar.end(),
[&dictionary](a8::word const& a, a8::word const& b) {
return at_or(dictionary, a, 0u) < at_or(dictionary, b, 0u); // @todo: probably not optimal
});
if (similar_it == similar.end()) return dictionary.end();
else return dictionary.find(*similar_it);
}
// takes an input stream with space separated pairs
// representing the word and the frequency of such word
a8::dict make_dictionary(std::istream& input) {
a8::word word;
unsigned int count;
a8::dict dictionary;
while (input >> word && input >> count) {
dictionary[word] = count;
}
return dictionary;
}
// runs the user interface for the program
// asking for words and replying to them one by one
void run_corrector(dict& dictionary) {
std::string word;
while (std::cin >> word) {
auto it = best_match(dictionary, word);
if (it != dictionary.end()) {
std::cout << it->first << ' ' << it->second << '\n';
it->second++;
} else {
std::cout << "-\n";
dictionary[word] = 1;
}
}
}
}
int main(int argc, char* argv[]) {
if (argc != 2) throw std::runtime_error("Filename not passed in");
std::string filename = argv[1];
std::ifstream filestream(filename);
a8::dict dictionary = a8::make_dictionary(filestream);
a8::run_corrector(dictionary);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment