Skip to content

Instantly share code, notes, and snippets.

@GeeLaw GeeLaw/gnfa.cpp
Created Dec 29, 2018

Embed
What would you like to do?
Playing with generalized NFA. The program gets a positive number as input, and outputs a regular expression that matches positive decimal integer without leading zeros that is a multiple of the input number.
// gnfa.cpp
// Copyright 2018 by Gee Law.
// Licensed under the MIT license.
#include<cstdio>
#include<vector>
#include<map>
// #define DEBUG_ONLY(t) do { t; } while (false)
#define DEBUG_ONLY(t) do { } while (false)
template <typename TDerived, typename TBase>
bool is(TBase *baseptr)
{
return dynamic_cast<TDerived *>(baseptr) != nullptr;
}
template <typename TDerived, typename TBase>
TDerived *as(TBase *baseptr)
{
return dynamic_cast<TDerived *>(baseptr);
}
struct RegExp
{
virtual ~RegExp() { }
virtual void FPrint(FILE *fp) = 0;
virtual RegExp *Clone() = 0;
protected:
RegExp() = default;
RegExp(RegExp const &) = delete;
RegExp(RegExp &&) = delete;
RegExp &operator = (RegExp const &) = delete;
RegExp &operator = (RegExp &&) = delete;
};
struct Epsilon;
struct Character;
struct Concat;
struct Choice;
struct KleeneStar;
struct Epsilon : RegExp
{
Epsilon()
{
}
RegExp *Clone()
{
return new Epsilon();
}
~Epsilon()
{
}
void FPrint(FILE *fp)
{
std::fputc('(', fp);
std::fputc(')', fp);
}
};
struct Character : RegExp
{
Character(char c)
: ch(c)
{
}
RegExp *Clone()
{
return new Character(ch);
}
~Character()
{
}
void FPrint(FILE *fp)
{
if (ch == '\\' || ch == '|' ||
ch == '(' || ch == ')' ||
ch == '*')
{
std::fputc('\\', fp);
}
std::fputc(ch, fp);
}
private:
char ch;
};
struct Concat : RegExp
{
Concat(RegExp *f, RegExp *s)
: first(f), second(s)
{
}
RegExp *Clone()
{
return new Concat(first->Clone(), second->Clone());
}
~Concat()
{
delete first;
delete second;
}
void FPrint(FILE *fp)
{
auto const firstParen = is<Choice>(first);
auto const secondParen = is<Choice>(second);
if (firstParen)
{
std::fputc('(', fp);
}
first->FPrint(fp);
if (firstParen)
{
std::fputc(')', fp);
}
if (secondParen)
{
std::fputc('(', fp);
}
second->FPrint(fp);
if (secondParen)
{
std::fputc(')', fp);
}
}
private:
RegExp *first, *second;
};
struct Choice : RegExp
{
Choice(RegExp *f, RegExp *s)
: first(f), second(s)
{
}
RegExp *Clone()
{
return new Choice(first->Clone(), second->Clone());
}
~Choice()
{
delete first;
delete second;
}
void FPrint(FILE *fp)
{
first->FPrint(fp);
std::fputc('|', fp);
second->FPrint(fp);
}
private:
RegExp *first, *second;
};
struct KleeneStar : RegExp
{
KleeneStar(RegExp *u)
: unit(u)
{
}
RegExp *Clone()
{
return new KleeneStar(unit->Clone());
}
~KleeneStar()
{
delete unit;
}
void FPrint(FILE *fp)
{
auto needParen = !is<Character>(unit);
if (needParen)
{
fputc('(', fp);
}
unit->FPrint(fp);
if (needParen)
{
fputc(')', fp);
}
fputc('*', fp);
}
private:
RegExp *unit;
};
// Factory constructors that take care of object ownership.
RegExp *MakeEpsilon()
{
return new Epsilon();
}
RegExp *MakeCharacter(char ch)
{
return new Character(ch);
}
RegExp *MakeConcat(RegExp *first, RegExp *second)
{
if (first == nullptr || second == nullptr)
{
delete first;
delete second;
return nullptr;
}
if (is<Epsilon>(first))
{
delete first;
return second;
}
if (is<Epsilon>(second))
{
delete second;
return first;
}
return new Concat(first, second);
}
RegExp *MakeChoice(RegExp *first, RegExp *second)
{
if (first == nullptr)
{
return second;
}
if (second == nullptr)
{
return first;
}
return new Choice(first, second);
}
RegExp *MakeKleeneStar(RegExp *unit)
{
if (unit == nullptr)
{
return MakeEpsilon();
}
if (is<Epsilon>(unit) || is<KleeneStar>(unit))
{
return unit;
}
return new KleeneStar(unit);
}
RegExp *MakeClone(RegExp *rgx)
{
return rgx == nullptr ? nullptr : rgx->Clone();
}
// state 0 = initial
// state 1 = accept
typedef std::vector<std::map<size_t, RegExp *>> GNFA;
void ReduceGNFA(GNFA &graph)
{
for (size_t sz = graph.size(); sz > 2; --sz)
{
auto const t = sz - 1;
RegExp *ttStar = nullptr;
// Process the self-loop.
{
RegExp *&tt = graph[t][t];
tt = MakeKleeneStar(tt);
ttStar = tt;
}
for (size_t i = 0; i != t; ++i)
{
RegExp *it = nullptr;
// Skip if there is no i->t.
{
auto const find_it = graph[i].find(t);
if (find_it == graph[i].end())
{
continue;
}
it = find_it->second;
if (it == nullptr)
{
continue;
}
}
for (size_t j = 0; j != t; ++j)
{
RegExp *tj = nullptr;
// Skip if there is no t->j.
{
auto const find_tj = graph[t].find(j);
if (find_tj == graph[t].end())
{
continue;
}
tj = find_tj->second;
if (tj == nullptr)
{
continue;
}
}
RegExp *&ij = graph[i][j];
DEBUG_ONLY(
{
std::fprintf(stderr, "Merging: %zu -- %zu --> %zu\n\tit = ", i, t, j);
it->FPrint(stderr);
std::fputs("\n\t(tt)* = ", stderr);
ttStar->FPrint(stderr);
std::fputs("\n\ttj = ", stderr);
tj->FPrint(stderr);
std::fputs("\n\tij = ", stderr);
if (ij != nullptr)
{
ij->FPrint(stderr);
}
else
{
std::fputs("(non existent)", stderr);
}
});
// Compose new i->j as it(tt)*tj|ij.
// ij is not cloned because their ownerships are taken care of.
// it, tt, tj must be cloned because they might be used later.
ij = MakeChoice(
MakeConcat(it->Clone(), MakeConcat(ttStar->Clone(), tj->Clone())),
ij
);
DEBUG_ONLY(
{
std::fputs("\n\t => ", stderr);
ij->FPrint(stderr);
std::fputc('\n', stderr);
});
}
}
// Clean up graph[t].
for (auto const &kv : graph[t])
{
delete kv.second;
}
graph.pop_back();
}
}
RegExp *GetRegex(GNFA &gnfa)
{
ReduceGNFA(gnfa);
// The accepting expression is
// (00)*(01)((11)*(10)(00)*(01))*(11)*.
auto const r00 = gnfa[0][0];
auto const r01 = gnfa[0][1];
auto const r10 = gnfa[1][0];
auto const r11 = gnfa[1][1];
return MakeConcat(
// (00)*(01)
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)),
// ((11)*(10)(00)*(01))*(11)*
MakeConcat(
// ((11)*(10)(00)*(01))*
MakeKleeneStar(MakeConcat(
// (11)*(10)
MakeConcat(MakeKleeneStar(MakeClone(r11)), MakeClone(r10)),
// (00)*(01)
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01))
)),
// (11)*
MakeKleeneStar(MakeClone(r11))
)
);
}
void DestoryGNFA(GNFA &gnfa)
{
for (auto const &i : gnfa)
{
for (auto const &j : i)
{
delete j.second;
}
}
gnfa.clear();
}
int main()
{
size_t m;
scanf("%zu", &m);
GNFA gnfa;
gnfa.resize(m + 1);
// state i => ~ (i - 1) mod m
// Create initial --- first digit ---> states.
for (size_t dgt = 1; dgt != 10; ++dgt)
{
const auto j = dgt % m;
RegExp *&sj = gnfa[0][j + 1];
sj = MakeChoice(sj, MakeCharacter('0' + dgt));
}
// Create states --- digit ---> states.
for (size_t i = 0; i != m; ++i)
{
for (size_t dgt = 0; dgt != 10; ++dgt)
{
auto const j = (i * 10 + dgt) % m;
RegExp *&ij = gnfa[i + 1][j + 1];
ij = MakeChoice(ij, MakeCharacter('0' + dgt));
}
}
DEBUG_ONLY(
{
for (int i = 0; i <= m; ++i)
{
for (auto const &j : gnfa[i])
{
std::fprintf(stderr, "%d --> %d: ", i, (int)j.first);
j.second->FPrint(stderr);
std::fputc('\n', stderr);
}
}
});
auto const rgx = GetRegex(gnfa);
DestoryGNFA(gnfa);
rgx->FPrint(stdout);
delete rgx;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.