Skip to content

Instantly share code, notes, and snippets.

@tralamazza
Created August 15, 2011 08:43
Show Gist options
  • Save tralamazza/1145911 to your computer and use it in GitHub Desktop.
Save tralamazza/1145911 to your computer and use it in GitHub Desktop.
breathalizer
/*
* breathalizer.cpp
*
* Created on: Aug 13, 2011
* Author: Daniel Tralamazza <daniel@tralamazza.com>
* URL: http://www.facebook.com/careers/puzzles.php?puzzle_id=17
*/
#define NORVIG
#define FACEBOOK
#include <string>
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
#include <sstream>
#include <fstream>
#ifdef NORVIG
#include <queue>
#include <utility>
#include <set>
#endif
using namespace std;
typedef map<string, int> map_str_int_type;
typedef map<int, vector<string> > map_int_vstr;
static map_str_int_type uwords;
static map_int_vstr idict;
#ifdef NORVIG
typedef set<string> set_str;
static set_str dict;
#endif
#ifdef FACEBOOK
static void read_input(const char* fn) {
ifstream is(fn, ifstream::in);
stringstream ss;
string line, w;
while (getline(is, line)) {
ss.str(line);
while (ss >> w) {
transform(w.begin(), w.end(), w.begin(), ::toupper); // upper case
uwords[w] = uwords[w] + 1; // frequency
}
}
}
static void read_dict() {
ifstream is("/var/tmp/twl06.txt", ifstream::in);
stringstream ss;
string line;
while (getline(is, line)) {
ss.str(line);
ss >> line;
idict[line.size()].push_back(line);
#ifdef NORVIG
dict.insert(line);
#endif
}
}
#else // FACEBOOK
static void read_input_dict(const char* fn) {
ifstream is(fn, ifstream::in);
stringstream ss;
string line, w;
bool read_phrase = true;
while (getline(is, line)) {
if (line.empty())
read_phrase = false;
ss.str(line);
if (read_phrase) {
while (ss >> w) {
transform(w.begin(), w.end(), w.begin(), ::toupper);
uwords[w] = uwords[w] + 1;
}
} else {
ss >> line;
idict[line.size()].push_back(line);
#ifdef NORVIG
dict.insert(line);
#endif
}
}
}
#endif // FACEBOOK
typedef pair<string, int> word_k;
#ifdef NORVIG
static inline int push_if_not(set<string>& visited, queue<word_k>& q, const string& w, const int k) {
if (w.empty()) return 0;
if (visited.count(w) == 0) {
q.push(make_pair(w, k));
visited.insert(w);
}
return dict.count(w);
}
static int shuffle_word(const string &w) {
set<string> visited;
queue<word_k> bfs_q;
if (push_if_not(visited, bfs_q, w, 0))
return 0;
while (!bfs_q.empty()) {
const string wk = bfs_q.front().first;
const unsigned int& k = bfs_q.front().second + 1;
const string::size_type wlen = wk.size();
bfs_q.pop();
for (string::size_type i = 0; i <= wlen; ++i) {
const string& sub_0i = (i < wlen) ? wk.substr(0, i) : wk;
const string& sub_i1n = (i + 1 < wlen) ? wk.substr(i + 1) : "";
const string& sub_in = (i < wlen) ? wk.substr(i) : "";
if ( push_if_not(visited, bfs_q, sub_0i + sub_i1n, k) ) return k;
for (char c = 'A'; c <= 'Z'; ++c) {
if ( push_if_not(visited, bfs_q, sub_0i + c + sub_i1n, k) ) return k;
if ( push_if_not(visited, bfs_q, sub_0i + c + sub_in, k) ) return k;
}
}
}
return -1;
}
#endif
static int levenshtein(const string& s1, const string& s2, const int k) {
int s1n = s1.size();
int s2n = s2.size();
const char* s1_c = s1.c_str();
const char* s2_c = s2.c_str();
while (s2n > 0 && s1n > 0 && *s1_c == *s2_c) {
s2n--;
s1n--;
s1_c++;
s2_c++;
}
while (s2n > 0 && s1n > 0 && s1_c[s2n-1] == s2_c[s1n-1]) {
s2n--;
s1n--;
}
if (s2n == 0) return s1n;
if (s1n == 0) return s2n;
int d[s1n + 1][s2n + 1];
for(int j = 0; j <= s2n; ++j)
d[0][j] = j;
for (int i = 1; i <= s1n; ++i) {
d[i][0] = i;
const char si = s1_c[i - 1];
for (int j = 1; j <= s2n; ++j) {
const int cost = (si == s2_c[j - 1]) ? 0 : 1;
int min = d[i - 1][j - 1] + cost;
const int a = d[i - 1][j] + 1;
const int b = d[i][j - 1] + 1;
if (a < min) min = a;
if (b < min) min = b;
d[i][j] = min;
}
}
return d[s1n][s2n];
}
static int scan_dict(const int dict, const string& w, int min) {
const vector<string> &vd = idict[dict];
if (vd.empty())
return min; // no dictionary for this size
int dist = 0;
string min_w;
for (vector<string>::const_iterator id = vd.begin(); id != vd.end(); id++) {
dist = levenshtein(w, (*id), min);
if (dist < min) {
min = dist;
min_w = (*id);
}
if (min == 0)
break; // found perfect match
}
return min;
}
int main(int argc, char** args) {
#ifdef FACEBOOK
read_input(args[1]);
read_dict();
#else
read_input_dict(args[1]);
#endif
int sum = 0;
for (map_str_int_type::const_iterator uw = uwords.begin(); uw != uwords.end(); ++uw) {
const string w = (*uw).first;
int min;
#ifdef NORVIG
if (w.size() > 4) {
#endif
const int len = w.size();
min = scan_dict(len, w, len);
for (int k = 1; ; ++k) {
if (k > min || min == 0) break;
min = scan_dict(len + k, w, min);
if (k > min || min == 0) break;
min = scan_dict(len - k, w, min);
}
#ifdef NORVIG
} else
min = shuffle_word(w);
#endif
sum += min * (*uw).second;
}
cout << sum << endl;
return 0;
}
@tralamazza
Copy link
Author

typedef pair<string, int> word_k;

static inline int push_if_not(set<string>& visited, queue<word_k>& q, const string& w, const int k) {
    if (visited.count(w) == 0) {
        q.push(make_pair(w, k));
        visited.insert(w);
    }
    return dict.count(w);
}

static int shuffle_word(const string &w) {
    set<string> visited;
    queue<word_k> bfs_q;
    if (push_if_not(visited, bfs_q, w, 0))
        return 0;
    while (!bfs_q.empty()) {
        const string wk = bfs_q.front().first;
        const unsigned int& k = bfs_q.front().second + 1;
        const string::size_type wlen = wk.size();
        bfs_q.pop();

        for (string::size_type i = 0; i <= wlen; ++i) {
            const string& sub_0i = (i < wlen) ? wk.substr(0, i) : wk;
            const string& sub_i1n = (i + 1 < wlen) ? wk.substr(i + 1) : "";
            const string& sub_in = (i < wlen) ? wk.substr(i) : "";

            if (sub_0i.empty() && sub_i1n.empty()) continue;

            if ( push_if_not(visited, bfs_q, sub_0i + sub_i1n, k) ) return k;
            for (char c = 'A'; c <= 'Z'; ++c) {
                if ( push_if_not(visited, bfs_q, sub_0i + c + sub_i1n, k) ) return k;
                if ( push_if_not(visited, bfs_q, sub_0i + c + sub_in, k) ) return k;
            }
        }
    }
    return -1;
}

Norvig's Spell-correct port. Good for small words.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment