Skip to content

Instantly share code, notes, and snippets.

@brokenpylons
Created March 4, 2022 08:59
Show Gist options
  • Save brokenpylons/dc384033dd6fec179041a9c996e5eff8 to your computer and use it in GitHub Desktop.
Save brokenpylons/dc384033dd6fec179041a9c996e5eff8 to your computer and use it in GitHub Desktop.
import java.io.File
import java.io.InputStream
import java.util.LinkedList
const val EOF_SYMBOL = -1
const val ERROR_STATE = 0
const val SKIP_VALUE = 0
const val NEWLINE = '\n'.code
interface Automaton {
val states: Set<Int>
val alphabet: IntRange
fun next(state: Int, symbol: Int): Int
fun value(state: Int): Int
val startState: Int
val finalStates: Set<Int>
}
object Example : Automaton {
override val states = setOf(1, 2, 3, 4, 5)
override val alphabet = 0 .. 255
override val startState = 1
override val finalStates = setOf(3, 4, 5)
private val numberOfStates = states.maxOrNull()!! + 1
private val numberOfSymbols = alphabet.maxOrNull()!! + 1
private val transitions = Array(numberOfStates) {IntArray(numberOfSymbols)}
private val values: Array<Int> = Array(numberOfStates) {0}
private fun setTransition(from: Int, symbol: Char, to: Int) {
transitions[from][symbol.code] = to
}
private fun setValue(state: Int, terminal: Int) {
values[state] = terminal
}
override fun next(state: Int, symbol: Int): Int =
if (symbol == EOF_SYMBOL) ERROR_STATE
else {
assert(states.contains(state))
assert(alphabet.contains(symbol))
transitions[state][symbol]
}
override fun value(state: Int): Int {
assert(states.contains(state))
return values[state]
}
init {
setTransition(1, 'b', 2)
setTransition(2, 'a', 1)
setTransition(2, 'b', 2)
setTransition(2, 'c', 3)
setTransition(2, 'd', 4)
setTransition(2, 'e', 5)
setValue(3, 1);
setValue(5, 2);
}
}
data class Token(val value: Int, val lexeme: String, val startRow: Int, val startColumn: Int)
class Scanner(private val automaton: Automaton, private val stream: InputStream) {
private var state = automaton.startState
private var last: Int? = null
private var buffer = LinkedList<Byte>()
private var row = 1
private var column = 1
private fun updatePosition(symbol: Int) {
if (symbol == NEWLINE) {
row += 1
column = 1
} else {
column += 1
}
}
private fun getValue(): Int {
var symbol = last ?: stream.read()
state = automaton.startState
while (true) {
updatePosition(symbol)
val nextState = automaton.next(state, symbol)
if (nextState == ERROR_STATE) {
if (automaton.finalStates.contains(state)) {
last = symbol
return automaton.value(state)
} else throw Error("Invalid pattern at ${row}:${column}")
}
state = nextState
buffer.add(symbol.toByte())
symbol = stream.read()
}
}
fun eof(): Boolean =
last == EOF_SYMBOL
fun getToken(): Token? {
if (eof()) return null
val startRow = row
val startColumn = column
buffer.clear()
val value = getValue()
return if (value == SKIP_VALUE)
getToken()
else
Token(value, String(buffer.toByteArray()), startRow, startColumn)
}
}
fun name(value: Int) =
when (value) {
1 -> "C"
2 -> "E"
else -> throw Error("Invalid value")
}
fun printTokens(scanner: Scanner) {
val token = scanner.getToken()
if (token != null) {
print("${name(token.value)}(\"${token.lexeme}\") ")
printTokens(scanner)
}
}
fun main(args: Array<String>) {
val scanner = Scanner(Example, "bcbabbbcbabababababc".byteInputStream())
printTokens(scanner)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment