Created
December 29, 2018 03:09
-
-
Save GeeLaw/be3aec94a6ba7c3817ef2e16d261f616 to your computer and use it in GitHub Desktop.
Playing with generalized NFA. The program gets a positive number as input, and outputs a regular expression that matches positive decimal integer without leading zeros that is a multiple of the input number.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// gnfa.cpp | |
// Copyright 2018 by Gee Law. | |
// Licensed under the MIT license. | |
#include<cstdio> | |
#include<vector> | |
#include<map> | |
// #define DEBUG_ONLY(t) do { t; } while (false) | |
#define DEBUG_ONLY(t) do { } while (false) | |
template <typename TDerived, typename TBase> | |
bool is(TBase *baseptr) | |
{ | |
return dynamic_cast<TDerived *>(baseptr) != nullptr; | |
} | |
template <typename TDerived, typename TBase> | |
TDerived *as(TBase *baseptr) | |
{ | |
return dynamic_cast<TDerived *>(baseptr); | |
} | |
struct RegExp | |
{ | |
virtual ~RegExp() { } | |
virtual void FPrint(FILE *fp) = 0; | |
virtual RegExp *Clone() = 0; | |
protected: | |
RegExp() = default; | |
RegExp(RegExp const &) = delete; | |
RegExp(RegExp &&) = delete; | |
RegExp &operator = (RegExp const &) = delete; | |
RegExp &operator = (RegExp &&) = delete; | |
}; | |
struct Epsilon; | |
struct Character; | |
struct Concat; | |
struct Choice; | |
struct KleeneStar; | |
struct Epsilon : RegExp | |
{ | |
Epsilon() | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Epsilon(); | |
} | |
~Epsilon() | |
{ | |
} | |
void FPrint(FILE *fp) | |
{ | |
std::fputc('(', fp); | |
std::fputc(')', fp); | |
} | |
}; | |
struct Character : RegExp | |
{ | |
Character(char c) | |
: ch(c) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Character(ch); | |
} | |
~Character() | |
{ | |
} | |
void FPrint(FILE *fp) | |
{ | |
if (ch == '\\' || ch == '|' || | |
ch == '(' || ch == ')' || | |
ch == '*') | |
{ | |
std::fputc('\\', fp); | |
} | |
std::fputc(ch, fp); | |
} | |
private: | |
char ch; | |
}; | |
struct Concat : RegExp | |
{ | |
Concat(RegExp *f, RegExp *s) | |
: first(f), second(s) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Concat(first->Clone(), second->Clone()); | |
} | |
~Concat() | |
{ | |
delete first; | |
delete second; | |
} | |
void FPrint(FILE *fp) | |
{ | |
auto const firstParen = is<Choice>(first); | |
auto const secondParen = is<Choice>(second); | |
if (firstParen) | |
{ | |
std::fputc('(', fp); | |
} | |
first->FPrint(fp); | |
if (firstParen) | |
{ | |
std::fputc(')', fp); | |
} | |
if (secondParen) | |
{ | |
std::fputc('(', fp); | |
} | |
second->FPrint(fp); | |
if (secondParen) | |
{ | |
std::fputc(')', fp); | |
} | |
} | |
private: | |
RegExp *first, *second; | |
}; | |
struct Choice : RegExp | |
{ | |
Choice(RegExp *f, RegExp *s) | |
: first(f), second(s) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new Choice(first->Clone(), second->Clone()); | |
} | |
~Choice() | |
{ | |
delete first; | |
delete second; | |
} | |
void FPrint(FILE *fp) | |
{ | |
first->FPrint(fp); | |
std::fputc('|', fp); | |
second->FPrint(fp); | |
} | |
private: | |
RegExp *first, *second; | |
}; | |
struct KleeneStar : RegExp | |
{ | |
KleeneStar(RegExp *u) | |
: unit(u) | |
{ | |
} | |
RegExp *Clone() | |
{ | |
return new KleeneStar(unit->Clone()); | |
} | |
~KleeneStar() | |
{ | |
delete unit; | |
} | |
void FPrint(FILE *fp) | |
{ | |
auto needParen = !is<Character>(unit); | |
if (needParen) | |
{ | |
fputc('(', fp); | |
} | |
unit->FPrint(fp); | |
if (needParen) | |
{ | |
fputc(')', fp); | |
} | |
fputc('*', fp); | |
} | |
private: | |
RegExp *unit; | |
}; | |
// Factory constructors that take care of object ownership. | |
RegExp *MakeEpsilon() | |
{ | |
return new Epsilon(); | |
} | |
RegExp *MakeCharacter(char ch) | |
{ | |
return new Character(ch); | |
} | |
RegExp *MakeConcat(RegExp *first, RegExp *second) | |
{ | |
if (first == nullptr || second == nullptr) | |
{ | |
delete first; | |
delete second; | |
return nullptr; | |
} | |
if (is<Epsilon>(first)) | |
{ | |
delete first; | |
return second; | |
} | |
if (is<Epsilon>(second)) | |
{ | |
delete second; | |
return first; | |
} | |
return new Concat(first, second); | |
} | |
RegExp *MakeChoice(RegExp *first, RegExp *second) | |
{ | |
if (first == nullptr) | |
{ | |
return second; | |
} | |
if (second == nullptr) | |
{ | |
return first; | |
} | |
return new Choice(first, second); | |
} | |
RegExp *MakeKleeneStar(RegExp *unit) | |
{ | |
if (unit == nullptr) | |
{ | |
return MakeEpsilon(); | |
} | |
if (is<Epsilon>(unit) || is<KleeneStar>(unit)) | |
{ | |
return unit; | |
} | |
return new KleeneStar(unit); | |
} | |
RegExp *MakeClone(RegExp *rgx) | |
{ | |
return rgx == nullptr ? nullptr : rgx->Clone(); | |
} | |
// state 0 = initial | |
// state 1 = accept | |
typedef std::vector<std::map<size_t, RegExp *>> GNFA; | |
void ReduceGNFA(GNFA &graph) | |
{ | |
for (size_t sz = graph.size(); sz > 2; --sz) | |
{ | |
auto const t = sz - 1; | |
RegExp *ttStar = nullptr; | |
// Process the self-loop. | |
{ | |
RegExp *&tt = graph[t][t]; | |
tt = MakeKleeneStar(tt); | |
ttStar = tt; | |
} | |
for (size_t i = 0; i != t; ++i) | |
{ | |
RegExp *it = nullptr; | |
// Skip if there is no i->t. | |
{ | |
auto const find_it = graph[i].find(t); | |
if (find_it == graph[i].end()) | |
{ | |
continue; | |
} | |
it = find_it->second; | |
if (it == nullptr) | |
{ | |
continue; | |
} | |
} | |
for (size_t j = 0; j != t; ++j) | |
{ | |
RegExp *tj = nullptr; | |
// Skip if there is no t->j. | |
{ | |
auto const find_tj = graph[t].find(j); | |
if (find_tj == graph[t].end()) | |
{ | |
continue; | |
} | |
tj = find_tj->second; | |
if (tj == nullptr) | |
{ | |
continue; | |
} | |
} | |
RegExp *&ij = graph[i][j]; | |
DEBUG_ONLY( | |
{ | |
std::fprintf(stderr, "Merging: %zu -- %zu --> %zu\n\tit = ", i, t, j); | |
it->FPrint(stderr); | |
std::fputs("\n\t(tt)* = ", stderr); | |
ttStar->FPrint(stderr); | |
std::fputs("\n\ttj = ", stderr); | |
tj->FPrint(stderr); | |
std::fputs("\n\tij = ", stderr); | |
if (ij != nullptr) | |
{ | |
ij->FPrint(stderr); | |
} | |
else | |
{ | |
std::fputs("(non existent)", stderr); | |
} | |
}); | |
// Compose new i->j as it(tt)*tj|ij. | |
// ij is not cloned because their ownerships are taken care of. | |
// it, tt, tj must be cloned because they might be used later. | |
ij = MakeChoice( | |
MakeConcat(it->Clone(), MakeConcat(ttStar->Clone(), tj->Clone())), | |
ij | |
); | |
DEBUG_ONLY( | |
{ | |
std::fputs("\n\t => ", stderr); | |
ij->FPrint(stderr); | |
std::fputc('\n', stderr); | |
}); | |
} | |
} | |
// Clean up graph[t]. | |
for (auto const &kv : graph[t]) | |
{ | |
delete kv.second; | |
} | |
graph.pop_back(); | |
} | |
} | |
RegExp *GetRegex(GNFA &gnfa) | |
{ | |
ReduceGNFA(gnfa); | |
// The accepting expression is | |
// (00)*(01)((11)*(10)(00)*(01))*(11)*. | |
auto const r00 = gnfa[0][0]; | |
auto const r01 = gnfa[0][1]; | |
auto const r10 = gnfa[1][0]; | |
auto const r11 = gnfa[1][1]; | |
return MakeConcat( | |
// (00)*(01) | |
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)), | |
// ((11)*(10)(00)*(01))*(11)* | |
MakeConcat( | |
// ((11)*(10)(00)*(01))* | |
MakeKleeneStar(MakeConcat( | |
// (11)*(10) | |
MakeConcat(MakeKleeneStar(MakeClone(r11)), MakeClone(r10)), | |
// (00)*(01) | |
MakeConcat(MakeKleeneStar(MakeClone(r00)), MakeClone(r01)) | |
)), | |
// (11)* | |
MakeKleeneStar(MakeClone(r11)) | |
) | |
); | |
} | |
void DestoryGNFA(GNFA &gnfa) | |
{ | |
for (auto const &i : gnfa) | |
{ | |
for (auto const &j : i) | |
{ | |
delete j.second; | |
} | |
} | |
gnfa.clear(); | |
} | |
int main() | |
{ | |
size_t m; | |
scanf("%zu", &m); | |
GNFA gnfa; | |
gnfa.resize(m + 1); | |
// state i => ~ (i - 1) mod m | |
// Create initial --- first digit ---> states. | |
for (size_t dgt = 1; dgt != 10; ++dgt) | |
{ | |
const auto j = dgt % m; | |
RegExp *&sj = gnfa[0][j + 1]; | |
sj = MakeChoice(sj, MakeCharacter('0' + dgt)); | |
} | |
// Create states --- digit ---> states. | |
for (size_t i = 0; i != m; ++i) | |
{ | |
for (size_t dgt = 0; dgt != 10; ++dgt) | |
{ | |
auto const j = (i * 10 + dgt) % m; | |
RegExp *&ij = gnfa[i + 1][j + 1]; | |
ij = MakeChoice(ij, MakeCharacter('0' + dgt)); | |
} | |
} | |
DEBUG_ONLY( | |
{ | |
for (int i = 0; i <= m; ++i) | |
{ | |
for (auto const &j : gnfa[i]) | |
{ | |
std::fprintf(stderr, "%d --> %d: ", i, (int)j.first); | |
j.second->FPrint(stderr); | |
std::fputc('\n', stderr); | |
} | |
} | |
}); | |
auto const rgx = GetRegex(gnfa); | |
DestoryGNFA(gnfa); | |
rgx->FPrint(stdout); | |
delete rgx; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment