Skip to content

Instantly share code, notes, and snippets.

@brimston3
Created October 15, 2018 19:19
Show Gist options
  • Save brimston3/27ebef93dc64d99d6085f86d4ade6a47 to your computer and use it in GitHub Desktop.
Save brimston3/27ebef93dc64d99d6085f86d4ade6a47 to your computer and use it in GitHub Desktop.
T-9 dict search
// g++ -Wall -std=c++14 -o treescan treescan.cpp && time ./treescan
// just goofin' around, searching for words in phone T-9 numbers.
#include <iostream>
#include <fstream>
#include <assert.h>
#include <string>
#include <array>
#include <algorithm>
#include <regex>
#include <limits>
struct dfs_ctx {
const char * pn; // the search string
int pnoff; // the offset from the original string that pn starts at
unsigned pnlen; // length of the search string
char * workbuf; // a work buffer for printing matches
};
class dnode {
public:
dnode();
virtual ~dnode();
bool add(const std::string & s, int linenum=1, int depth = 0);
int pn_dfs(const std::string & pn) const;
int pn_dfs_impl(struct dfs_ctx & dc, unsigned depth) const;
std::array<dnode*, 26> nl;
int complete;
} dnode_st;
dnode::dnode() :
nl{0,},
complete(0)
{}
dnode::~dnode()
{
for (auto & nlp : nl) {
if (nlp) {
delete nlp;
}
}
}
__attribute__((pure))
static int map_char_to_radix(const char c) {
assert(c >= 'a' && c <= 'z');
return c-'a';
}
bool dnode::add(const std::string & s, int linenum, int depth) {
assert(depth>=0);
if (s.length() == static_cast<unsigned>(depth)) {
complete = linenum;
return true;
}
dnode *& rn = nl[map_char_to_radix(s[depth])];
if (rn == nullptr) {
rn = new dnode();
if (!rn) {
return false;
}
}
return rn->add(s, linenum, depth+1);
}
const std::map<char, const std::string> anum_map = {
{'2', "abc"},
{'3', "def"},
{'4', "ghi"},
{'5', "jkl"},
{'6', "mno"},
{'7', "pqrs"},
{'8', "tuv"},
{'9', "wxyz"}
};
int dnode::pn_dfs(const std::string & pn) const {
char workbuf[pn.length()+1] = {0,};
if (pn.length() == 0) return 0;
int cnt = 0;
assert(pn.length() <= std::numeric_limits<unsigned>::max());
struct dfs_ctx dc = {
0, 0, static_cast<unsigned>(pn.length()), workbuf
};
for (unsigned i = 0; i < pn.length(); i++) {
dc.pn = pn.c_str()+i;
dc.pnoff = i;
cnt += pn_dfs_impl(dc, 0);
}
return cnt;
}
int dnode::pn_dfs_impl(struct dfs_ctx & dc, unsigned depth) const {
if (!!complete) {
std::cout << std::string{dc.workbuf, dc.workbuf + depth} << " @" << dc.pnoff
<< " [l#" << complete << "]" << std::endl;
}
int count = !!complete;
if (dc.pnlen == depth)
return count;
auto pos = anum_map.find(dc.pn[depth]);
if (pos == anum_map.end())
return count;
for (char c : pos->second) {
dc.workbuf[depth] = c;
const dnode * const & rn = nl[map_char_to_radix(c)];
if (rn == nullptr) continue;
count += rn->pn_dfs_impl(dc, depth+1);
}
return count;
}
static int load_dict(dnode & d, std::string filename) {
std::ifstream infile(filename);
std::string l;
std::regex r("^[a-zA-Z]+$");
unsigned total_added = 0;
unsigned linenum = 1; // should be at least 1
while(std::getline(infile, l)) {
if (regex_match(l,r)) {
std::transform(l.begin(), l.end(), l.begin(), ::tolower);
if (!d.add(l,linenum)) {
std::cerr << "Error adding word \"" << l << "\" to tree" << std::endl;
} else {
++total_added;
}
}
/*
else {
omit illegal string.
}
*/
++linenum;
}
std::cerr << total_added << " words added" << std::endl;
return total_added;
}
int main() {
dnode root;
load_dict(root, "/usr/share/dict/american-english");
int cnt = root.pn_dfs("3825633");
std::cout << "Found " << cnt << " matches" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment