Last active
December 22, 2015 23:49
-
-
Save edobashira/6549668 to your computer and use it in GitHub Desktop.
PhiMatcher skip matching
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Compile something like this | |
//g++ main.cc -o main -O2 -lfst | |
#include <iostream> | |
#include <fst/compose.h> | |
#include <fst/vector-fst.h> | |
using namespace std; | |
using namespace fst; | |
template<class Arc> | |
void BuildChain(const string& str, const SymbolTable& syms, VectorFst<Arc>* fst, typename Arc::Label phi = -1) { | |
typedef typename Arc::StateId S; | |
typedef typename Arc::Label L; | |
typedef typename Arc::Weight W; | |
vector<string> fields; | |
fst->DeleteStates(); | |
S s = fst->AddState(); | |
fst->SetStart(s); | |
for (int i = 0; i != str.size(); ++i) { | |
S d = fst->AddState(); | |
L l = syms.Find(fields[i]); | |
fst->AddArc(s, Arc(l, l, W::One(), d)); | |
} | |
} | |
template<class Arc> | |
void BuildLinearChainAutomata(const string& str, VectorFst<Arc>* ofst, SymbolTable* syms, typename Arc::Label phi = -1) { | |
typedef typename Arc::Label L; | |
typedef typename Arc::StateId S; | |
typedef typename Arc::Weight W; | |
char* cstr = new char[str.size() + 1]; | |
strcpy(cstr, str.c_str()); | |
vector<char*> fields; | |
SplitToVector(cstr, " ", &fields, true); | |
S s = ofst->AddState(); | |
ofst->SetStart(s); | |
for (size_t i = 0; i != fields.size(); ++i) { | |
S d = ofst->AddState(); | |
L l = syms->AddSymbol(fields[i]); | |
ofst->AddArc(s, Arc(l, l, W::One(), d)); | |
s = d; | |
} | |
S d = ofst->AddState(); | |
ofst->AddArc(s, Arc(0, 0, W::One(), d)); | |
s = d; | |
ofst->SetFinal(s, W::One()); | |
if (phi != -1) | |
for (S s = 0; s != ofst->NumStates(); ++s) | |
ofst->AddArc(s, Arc(phi, phi, W::One(), s)); | |
delete[] cstr; | |
} | |
template<class Arc> | |
bool PhiCompose(const Fst<Arc> &fst1, | |
const Fst<Arc> &fst2, | |
typename Arc::Label phi, | |
MutableFst<Arc> *ofst) { | |
typedef Fst<Arc> F; | |
typedef PhiMatcher<SortedMatcher<F> > M; | |
ComposeFstOptions<Arc, M> copts; | |
M *matcher = new M(fst1, MATCH_OUTPUT, phi); | |
copts.matcher1 = matcher; | |
*ofst = ComposeFst<Arc>(fst1, fst2, copts); | |
Connect(ofst); | |
return ofst->NumStates() > 0; | |
} | |
int main(int argc, char** argv) { | |
SymbolTable syms("syms"); | |
syms.AddSymbol("<eps>", 0); | |
StdArc::Label phi = syms.AddSymbol("<phi>"); | |
ifstream ifs(argv[1]); | |
if (!ifs.is_open()) | |
LOG(ERROR) << "Could not open file : " << argv[1]; | |
string s; | |
getline(ifs, s); | |
StdVectorFst needle; | |
BuildLinearChainAutomata(s, &needle, &syms, phi); | |
ArcSort(&needle, OLabelCompare<StdArc>()); | |
for (; getline(ifs, s); ) { | |
StdVectorFst haystack; | |
BuildLinearChainAutomata(s, &haystack, &syms); | |
StdVectorFst res; | |
if (PhiCompose(needle, haystack, phi, &res)) { | |
cout << s << endl; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment