Skip to content

Instantly share code, notes, and snippets.

@edobashira
Last active December 22, 2015 23:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edobashira/6549668 to your computer and use it in GitHub Desktop.
Save edobashira/6549668 to your computer and use it in GitHub Desktop.
PhiMatcher skip matching
//Compile something like this
//g++ main.cc -o main -O2 -lfst
#include <iostream>
#include <fst/compose.h>
#include <fst/vector-fst.h>
using namespace std;
using namespace fst;
template<class Arc>
void BuildChain(const string& str, const SymbolTable& syms, VectorFst<Arc>* fst, typename Arc::Label phi = -1) {
typedef typename Arc::StateId S;
typedef typename Arc::Label L;
typedef typename Arc::Weight W;
vector<string> fields;
fst->DeleteStates();
S s = fst->AddState();
fst->SetStart(s);
for (int i = 0; i != str.size(); ++i) {
S d = fst->AddState();
L l = syms.Find(fields[i]);
fst->AddArc(s, Arc(l, l, W::One(), d));
}
}
template<class Arc>
void BuildLinearChainAutomata(const string& str, VectorFst<Arc>* ofst, SymbolTable* syms, typename Arc::Label phi = -1) {
typedef typename Arc::Label L;
typedef typename Arc::StateId S;
typedef typename Arc::Weight W;
char* cstr = new char[str.size() + 1];
strcpy(cstr, str.c_str());
vector<char*> fields;
SplitToVector(cstr, " ", &fields, true);
S s = ofst->AddState();
ofst->SetStart(s);
for (size_t i = 0; i != fields.size(); ++i) {
S d = ofst->AddState();
L l = syms->AddSymbol(fields[i]);
ofst->AddArc(s, Arc(l, l, W::One(), d));
s = d;
}
S d = ofst->AddState();
ofst->AddArc(s, Arc(0, 0, W::One(), d));
s = d;
ofst->SetFinal(s, W::One());
if (phi != -1)
for (S s = 0; s != ofst->NumStates(); ++s)
ofst->AddArc(s, Arc(phi, phi, W::One(), s));
delete[] cstr;
}
template<class Arc>
bool PhiCompose(const Fst<Arc> &fst1,
const Fst<Arc> &fst2,
typename Arc::Label phi,
MutableFst<Arc> *ofst) {
typedef Fst<Arc> F;
typedef PhiMatcher<SortedMatcher<F> > M;
ComposeFstOptions<Arc, M> copts;
M *matcher = new M(fst1, MATCH_OUTPUT, phi);
copts.matcher1 = matcher;
*ofst = ComposeFst<Arc>(fst1, fst2, copts);
Connect(ofst);
return ofst->NumStates() > 0;
}
int main(int argc, char** argv) {
SymbolTable syms("syms");
syms.AddSymbol("<eps>", 0);
StdArc::Label phi = syms.AddSymbol("<phi>");
ifstream ifs(argv[1]);
if (!ifs.is_open())
LOG(ERROR) << "Could not open file : " << argv[1];
string s;
getline(ifs, s);
StdVectorFst needle;
BuildLinearChainAutomata(s, &needle, &syms, phi);
ArcSort(&needle, OLabelCompare<StdArc>());
for (; getline(ifs, s); ) {
StdVectorFst haystack;
BuildLinearChainAutomata(s, &haystack, &syms);
StdVectorFst res;
if (PhiCompose(needle, haystack, phi, &res)) {
cout << s << endl;
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment