-
-
Save JarnaChao09/24ec66e1f4e13d96a2452977dc193851 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
enum class TokenType { | |
ID, | |
EQ, | |
OR, | |
AC, | |
AS, | |
NL, | |
} | |
data class Token(val value: String, val type: TokenType) | |
sealed interface Grammar { | |
var next: Grammar? | |
} | |
data class Ident(val name: String, override var next: Grammar? = null) : Grammar { | |
override fun toString(): String = "$name${next?.toString() ?: ""}" | |
} | |
data class AlphabetCharacter(val character: String, override var next: Grammar? = null) : Grammar { | |
override fun toString(): String = "$character${next?.toString() ?: ""}" | |
} | |
data class AcceptingState(override var next: Grammar? = null) : Grammar { | |
override fun toString(): String = "AcceptingState${next?.toString() ?: ""}" | |
} | |
data class Rule(val identifier: Grammar, val rule: List<Grammar>, override var next: Grammar? = null) : Grammar { | |
override fun toString(): String = "$identifier = ${rule.joinToString(separator=" | ")}${if (next != null) { | |
"\n${next.toString()}" | |
} else { | |
"" | |
}}" | |
} | |
fun lex(input: String, alphabet: Set<Char>, acceptingState: String): List<Token> { | |
return buildList { | |
var i = 0 | |
while (i < input.length) { | |
val c = input[i] | |
when(c) { | |
' ' -> {} | |
in alphabet -> add(Token("$c", TokenType.AC)) | |
'\n' -> add(Token("\\n", TokenType.NL)) | |
'|' -> add(Token("$c", TokenType.OR)) | |
'=' -> add(Token("$c", TokenType.EQ)) | |
else -> { | |
val s = buildString { | |
append(c) | |
i++ | |
while (i < input.length && ( | |
input[i] !in alphabet && input[i] != '\n' && input[i] != '|' && input[i] != '=' && input[i] != ' ' | |
)) { | |
append(input[i++]) | |
} | |
i-- | |
} | |
add(Token(s, if (s == acceptingState) { | |
TokenType.AS | |
} else { | |
TokenType.ID | |
})) | |
} | |
} | |
i++ | |
} | |
} | |
} | |
data class ParserState(val tokens: List<Token>, val index: Int) { | |
val value: Token? | |
get() = if (index < tokens.size) { tokens[index] } else { null } | |
val nextState: ParserState | |
get() = ParserState(tokens, index + 1) | |
} | |
fun atom(state: ParserState): Pair<Grammar, ParserState> { | |
var currentState = state | |
var ret: Grammar? = null | |
var curr: Grammar? = null | |
while ((currentState.value?.type?.equals(TokenType.ID) == true) || | |
(currentState.value?.type?.equals(TokenType.AC) == true) || | |
(currentState.value?.type?.equals(TokenType.AS) == true)) { | |
val value = currentState.value!! | |
when (value.type) { | |
TokenType.ID -> { | |
if (ret == null) { | |
ret = Ident(value.value) | |
curr = ret | |
} else { | |
curr!!.next = Ident(value.value) | |
curr = curr.next | |
} | |
} | |
TokenType.AC -> { | |
if (ret == null) { | |
ret = AlphabetCharacter(value.value) | |
curr = ret | |
} else { | |
curr!!.next = AlphabetCharacter(value.value) | |
curr = curr.next | |
} | |
} | |
TokenType.AS -> { | |
if (ret == null) { | |
ret = AcceptingState() | |
curr = ret | |
} else { | |
curr!!.next = AcceptingState() | |
curr = curr.next | |
} | |
} | |
else -> { | |
error("error state 1") | |
} | |
} | |
currentState = currentState.nextState | |
} | |
return (ret ?: error("error state 2")) to currentState | |
} | |
fun rule(state: ParserState): Pair<Grammar, ParserState> { | |
var (expr: Grammar, currentState) = atom(state) | |
if (currentState.value?.type?.equals(TokenType.EQ) == true) { | |
val rules = buildList { | |
do { | |
currentState = currentState.nextState | |
val (c, s) = atom(currentState) | |
add(c) | |
currentState = s | |
} while (currentState.value?.type?.equals(TokenType.OR) == true) | |
} | |
expr = Rule(expr, rule=rules) | |
} | |
return expr to currentState | |
} | |
fun parse(input: List<Token>): Grammar { | |
val state = ParserState(input, -1) | |
var ret: Grammar? = null | |
var curr: Grammar? = null | |
var currentState = state | |
do { | |
currentState = currentState.nextState | |
val (tr, ts) = rule(currentState) | |
currentState = ts | |
if (ret == null) { | |
ret = tr | |
curr = ret | |
} else { | |
curr!!.next = tr | |
curr = curr!!.next // !! IS ONLY NEEDED FOR KOTLIN KERNEL | |
} | |
} while (currentState.value?.type?.equals(TokenType.NL) == true) | |
return ret!! | |
} | |
data class NFANode(val stateID: String, val transitions: Map<String, String>, val lambdaTransitions: Set<String> = setOf()) | |
class NFA(val startState: String, val acceptStates: Set<String>, val nfaNodes: Map<String, NFANode>) { | |
fun accepts(value: String): Boolean { | |
var currentStates = setOf(startState to 0) | |
val finishedStates = mutableSetOf<Pair<String, Int>>() | |
while (currentStates.isNotEmpty()) { | |
currentStates = buildSet { | |
for ((currentState, currentIndex) in currentStates) { | |
if (currentIndex < value.length) { | |
val node = nfaNodes[currentState]!! | |
for ((transitionString, destinationNode) in node.transitions) { | |
if (currentIndex + transitionString.length <= value.length && value.substring(currentIndex until (currentIndex + transitionString.length)) == transitionString) { | |
add(destinationNode to currentIndex + transitionString.length) | |
} | |
} | |
addAll(node.lambdaTransitions.map { it to currentIndex }) | |
} else { | |
finishedStates.add(currentState to currentIndex) | |
} | |
} | |
} | |
} | |
return finishedStates.any { (s, _) -> s in acceptStates } | |
} | |
override fun toString(): String { | |
return buildString { | |
append("Start State: ${this@NFA.startState} | Accept States: ${this@NFA.acceptStates}") | |
append('\n') | |
for ((lhs, node) in this@NFA.nfaNodes) { | |
append(" $lhs -> $node") | |
append('\n') | |
} | |
} | |
} | |
} | |
fun grammar2NFA(grammar: Grammar, startState: String, defaultAcceptState: String="X"): NFA { | |
var current: Grammar? = grammar | |
val acceptingStateSet = mutableSetOf(defaultAcceptState) | |
val nfaMap = buildMap { | |
put(defaultAcceptState, NFANode(defaultAcceptState, emptyMap(), emptySet())) | |
while (current != null) { | |
val currentLambdaTransitions = mutableSetOf<String>() | |
val currentTransitions = mutableMapOf<String, String>() | |
val c = current | |
current = current?.next | |
when (c) { | |
is Rule -> { | |
val currentState = when (val id = c.identifier) { | |
is Ident -> id.name | |
else -> error("lhs of a = must be an identifier") | |
} | |
for (g in c.rule) { | |
when (g) { | |
is AcceptingState -> acceptingStateSet.add(currentState) | |
is Ident -> currentLambdaTransitions.add(g.name) | |
is AlphabetCharacter -> { | |
var currentChar: Grammar? = g | |
var sum = "" | |
while (currentChar != null) { | |
when (currentChar) { | |
is AlphabetCharacter -> { | |
sum += currentChar.character | |
currentChar = currentChar.next | |
if (currentChar == null) { | |
currentTransitions[sum] = defaultAcceptState | |
} | |
} | |
is Ident -> { | |
currentTransitions[sum] = currentChar.name | |
currentChar = currentChar.next | |
} | |
else -> error("cannot nest rule") | |
} | |
} | |
} | |
is Rule -> error("cannot nest rule") | |
else -> error("needed to satisfy kotlin kernel") // ERROR ONLY ON KOTLIN KERNEL | |
} | |
put(currentState, NFANode(currentState, currentTransitions, currentLambdaTransitions)) | |
} | |
} | |
else -> error("top level should be rule") | |
} | |
} | |
} | |
return NFA(startState, acceptingStateSet, nfaMap) | |
} | |
fun enumerate(size: Int, alphabet: Set<Char>): Sequence<String> { | |
return sequence { | |
var prev = listOf("") | |
repeat(size) { | |
prev = buildList { | |
for (str in prev) { | |
for (alphabetCharacter in alphabet) { | |
val tmp = str + alphabetCharacter | |
add(tmp) | |
yield(tmp) | |
} | |
} | |
} | |
} | |
} | |
} | |
fun wrap(title: String, separator: String = "-", n: Int = 20, block: () -> Unit): Unit { | |
println(title) | |
repeat(n) { print(separator) } | |
println() | |
block() | |
repeat(n) { print(separator) } | |
println() | |
} | |
// =========== cell 2 ============= | |
val alphabet = setOf( | |
'0', '1' | |
) | |
val startState = "S" | |
val acceptState = "x" | |
val grammar = """ | |
S = 0S | 1A | |
A = 1S | 0B | |
B = x | C | |
C = 0000 | S | |
""".trimIndent() | |
val n = 6 | |
val tokens = lex(grammar, alphabet, acceptState) | |
val tree = parse(tokens) | |
wrap("Parsed Grammar") { | |
println(tree) | |
} | |
val nfa = grammar2NFA(tree, startState) | |
wrap("Compiled NFA") { | |
println(nfa) | |
} | |
val (accepted, rejected) = enumerate(n, alphabet).partition { | |
nfa.accepts(it) | |
} | |
wrap("Accepted Strings") { | |
accepted.forEach(::println) | |
} | |
wrap("Rejected Strings") { | |
rejected.forEach(::println) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment