Skip to content

Instantly share code, notes, and snippets.

@kumailxp
Last active July 19, 2018 09:56
Show Gist options
  • Save kumailxp/b728edeaf79753ed227e7eb0455cd3fe to your computer and use it in GitHub Desktop.
Save kumailxp/b728edeaf79753ed227e7eb0455cd3fe to your computer and use it in GitHub Desktop.
calculate levenschtein distance and find possible match in vector
#include<iostream>
#include<vector>
#include<map>
#include<algorithm>
#include<iterator>
#include <fstream>
#include <cstdlib>
#include <chrono>
using namespace std;
class Timer
{
// make things readable
using clk = std::chrono::steady_clock;
clk::time_point b; // begin
clk::time_point e; // end
public:
void clear() { b = e = clk::now(); }
void start() { b = clk::now(); }
void stop() { e = clk::now(); }
friend std::ostream& operator<<(std::ostream& o, const Timer& timer)
{
return o << timer.secs();
}
// return time difference in seconds
double secs() const
{
if(e <= b)
return 0.0;
auto d = std::chrono::duration_cast<std::chrono::microseconds>(e - b);
return d.count() / 1000000.0;
}
};
// len_s and len_t are the number of characters in string s and t respectively
unsigned int LevenshteinDistance(string s, unsigned int len_s, string t, unsigned int len_t)
{
/* base case: empty strings */
if (len_s == 0) return len_t;
if (len_t == 0) return len_s;
/* test if last characters of the strings match */
int cost = (s[len_s-1] == t[len_t-1]) ? 0 : 1;
/* return minimum of delete char from s, delete char from t, and delete char from both */
return min(LevenshteinDistance(s, len_s - 1, t, len_t ) + 1,
min(LevenshteinDistance(s, len_s , t, len_t - 1) + 1,
LevenshteinDistance(s, len_s - 1, t, len_t - 1) + cost));
}
// len_s and len_t are the number of characters in string s and t respectively
unsigned int LevenshteinDistance_optmized(string s, unsigned int len_s, string t, unsigned int len_t)
{
/* base case: empty strings */
if (len_s == 0) return len_t;
if (len_t == 0) return len_s;
len_s = s.size();
len_t = t.size();
unsigned int d[len_s][len_t] = {0};
for(unsigned int i = 1; i < len_s; i++) {
d[i][0] = i;
}
for(unsigned int j = 1; j < len_t; j++) {
d[0][j] = j;
}
unsigned int subCost = 0;
for(unsigned j = 1; j < len_t; j++) {
for(unsigned int i = 1; i < len_s; i++) {
if(s.at(i) == t.at(j))
subCost = 0;
else
subCost = 1;
d[i][j] = min(d[i-1][j] + 1, min(d[i][j-1] + 1, d[i-1][j-1] + subCost));
}
}
return d[len_s-1][len_t-1];
}
static bool validateUserInput(string input) {
unsigned int i = 0;
while(i < input.size()) {
if(!isalnum(input.at(i))) {
cout << "invalid characters!" << endl;
return false;
}
i++;
}
if(input.empty())
return false;
return true;
}
static void loadWordListFile(const string filename, vector<string>& listofWords) {
ifstream fin(filename.c_str());
if(!fin) {
cout << "error: can\'t open the file!" << endl;
abort();
}
string data;
while(getline(fin, data)) {
listofWords.emplace_back(data);
}
}
int main() {
//cout << "Lev distance: "
//<< LevenshteinDistance("Kitten", 6, "sitting", 7) << endl;
// create a vector of random
//vector<string> listofWords = {"hello", "might", "power", "wonder"};
vector<string> listofWords;
loadWordListFile("words.txt", listofWords);
//cout << "Select one of the following words:" << endl;
//copy(listofWords.begin(), listofWords.end(), ostream_iterator<string>(cout, "\n"));
string userChoice;
cout << endl << endl <<"please type a word: " << endl;
if (!std::getline(std::cin, userChoice)) {
cout << "Invalid input" << endl;
return -1;
}
if(!validateUserInput(userChoice)) {
cout << "invalid input" << endl;
return -1;
}
if (find (listofWords.begin(), listofWords.end(), userChoice) != listofWords.end()) {
cout << "You entered an exactly correct word" << endl;
return 0;
}
Timer timer;
vector < pair<string, unsigned int> >lvDistMap;
timer.start();
transform(listofWords.begin(), listofWords.end(),
back_inserter(lvDistMap), [userChoice](string I) {
unsigned int lvDist =
LevenshteinDistance_optmized(userChoice, userChoice.size(), I, I.size());
return make_pair(I, lvDist);
});
timer.stop();
cout << "Time taken to find Levenshtein distance: " << timer << " secs" << endl;
cout << "====================================" << endl;
#if 0
cout << "==============================" << endl;
cout << "Sort and print the whole table" << endl;
cout << "==============================" << endl;
sort(lvDistMap.begin(), lvDistMap.end(),
[](const pair<string, unsigned int>& lhs,
const pair<string, unsigned int>& rhs) {
return lhs.second < rhs.second;
});
for(auto& I : lvDistMap) {
cout << "\'" << I.first << "\' has a Levenshtein distance of: " << I.second
<< " relative to \'" << userChoice << "\'" << endl;
}
cout << "==============================" << endl;
cout << "print minimum distance only" << endl;
cout << "==============================" << endl;
#endif
auto result = min_element(lvDistMap.begin(), lvDistMap.end(),
[](const pair<string, unsigned int>& lhs,
const pair<string, unsigned int>& rhs) {
return lhs.second < rhs.second;
});
if(result->second <= 5)
cout << "Perhaps you wanted to type \'" << result->first << "\'" << endl;
else
cout << "Can't find a close match" << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment