Created
March 4, 2022 08:59
-
-
Save brokenpylons/dc384033dd6fec179041a9c996e5eff8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File | |
import java.io.InputStream | |
import java.util.LinkedList | |
const val EOF_SYMBOL = -1 | |
const val ERROR_STATE = 0 | |
const val SKIP_VALUE = 0 | |
const val NEWLINE = '\n'.code | |
interface Automaton { | |
val states: Set<Int> | |
val alphabet: IntRange | |
fun next(state: Int, symbol: Int): Int | |
fun value(state: Int): Int | |
val startState: Int | |
val finalStates: Set<Int> | |
} | |
object Example : Automaton { | |
override val states = setOf(1, 2, 3, 4, 5) | |
override val alphabet = 0 .. 255 | |
override val startState = 1 | |
override val finalStates = setOf(3, 4, 5) | |
private val numberOfStates = states.maxOrNull()!! + 1 | |
private val numberOfSymbols = alphabet.maxOrNull()!! + 1 | |
private val transitions = Array(numberOfStates) {IntArray(numberOfSymbols)} | |
private val values: Array<Int> = Array(numberOfStates) {0} | |
private fun setTransition(from: Int, symbol: Char, to: Int) { | |
transitions[from][symbol.code] = to | |
} | |
private fun setValue(state: Int, terminal: Int) { | |
values[state] = terminal | |
} | |
override fun next(state: Int, symbol: Int): Int = | |
if (symbol == EOF_SYMBOL) ERROR_STATE | |
else { | |
assert(states.contains(state)) | |
assert(alphabet.contains(symbol)) | |
transitions[state][symbol] | |
} | |
override fun value(state: Int): Int { | |
assert(states.contains(state)) | |
return values[state] | |
} | |
init { | |
setTransition(1, 'b', 2) | |
setTransition(2, 'a', 1) | |
setTransition(2, 'b', 2) | |
setTransition(2, 'c', 3) | |
setTransition(2, 'd', 4) | |
setTransition(2, 'e', 5) | |
setValue(3, 1); | |
setValue(5, 2); | |
} | |
} | |
data class Token(val value: Int, val lexeme: String, val startRow: Int, val startColumn: Int) | |
class Scanner(private val automaton: Automaton, private val stream: InputStream) { | |
private var state = automaton.startState | |
private var last: Int? = null | |
private var buffer = LinkedList<Byte>() | |
private var row = 1 | |
private var column = 1 | |
private fun updatePosition(symbol: Int) { | |
if (symbol == NEWLINE) { | |
row += 1 | |
column = 1 | |
} else { | |
column += 1 | |
} | |
} | |
private fun getValue(): Int { | |
var symbol = last ?: stream.read() | |
state = automaton.startState | |
while (true) { | |
updatePosition(symbol) | |
val nextState = automaton.next(state, symbol) | |
if (nextState == ERROR_STATE) { | |
if (automaton.finalStates.contains(state)) { | |
last = symbol | |
return automaton.value(state) | |
} else throw Error("Invalid pattern at ${row}:${column}") | |
} | |
state = nextState | |
buffer.add(symbol.toByte()) | |
symbol = stream.read() | |
} | |
} | |
fun eof(): Boolean = | |
last == EOF_SYMBOL | |
fun getToken(): Token? { | |
if (eof()) return null | |
val startRow = row | |
val startColumn = column | |
buffer.clear() | |
val value = getValue() | |
return if (value == SKIP_VALUE) | |
getToken() | |
else | |
Token(value, String(buffer.toByteArray()), startRow, startColumn) | |
} | |
} | |
fun name(value: Int) = | |
when (value) { | |
1 -> "C" | |
2 -> "E" | |
else -> throw Error("Invalid value") | |
} | |
fun printTokens(scanner: Scanner) { | |
val token = scanner.getToken() | |
if (token != null) { | |
print("${name(token.value)}(\"${token.lexeme}\") ") | |
printTokens(scanner) | |
} | |
} | |
fun main(args: Array<String>) { | |
val scanner = Scanner(Example, "bcbabbbcbabababababc".byteInputStream()) | |
printTokens(scanner) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment