Skip to content

Instantly share code, notes, and snippets.

@brokenpylons
Last active March 6, 2024 14:02
Show Gist options
  • Save brokenpylons/71fdd4f07aed8e3f212250946a00d3bf to your computer and use it in GitHub Desktop.
Save brokenpylons/71fdd4f07aed8e3f212250946a00d3bf to your computer and use it in GitHub Desktop.
import java.io.InputStream
import java.io.OutputStream
const val ERROR_STATE = 0
enum class Symbol {
EOF,
SKIP,
FOR,
FOREACH,
FFF
}
const val EOF = -1
const val NEWLINE = '\n'.code
interface DFA {
val states: Set<Int>
val alphabet: IntRange
fun next(state: Int, code: Int): Int
fun symbol(state: Int): Symbol
val startState: Int
val finalStates: Set<Int>
}
object ForForeachFFFAutomaton: DFA {
override val states = (1 .. 11).toSet()
override val alphabet = 0 .. 255
override val startState = 1
override val finalStates = setOf(2, 3, 5, 9, 10, 11)
private val numberOfStates = states.max() + 1 // plus the ERROR_STATE
private val numberOfCodes = alphabet.max() + 1 // plus the EOF
private val transitions = Array(numberOfStates) {IntArray(numberOfCodes)}
private val values = Array(numberOfStates) {Symbol.SKIP}
private fun setTransition(from: Int, chr: Char, to: Int) {
transitions[from][chr.code + 1] = to // + 1 because EOF is -1 and the array starts at 0
}
private fun setTransition(from: Int, code: Int, to: Int) {
transitions[from][code + 1] = to
}
private fun setSymbol(state: Int, symbol: Symbol) {
values[state] = symbol
}
override fun next(state: Int, code: Int): Int {
assert(states.contains(state))
assert(alphabet.contains(code))
return transitions[state][code + 1]
}
override fun symbol(state: Int): Symbol {
assert(states.contains(state))
return values[state]
}
init {
setTransition(1, 'f', 2)
setTransition(2, 'f', 3)
setTransition(3, 'f', 3)
setTransition(2, 'o', 4)
setTransition(4, 'r', 5)
setTransition(5, 'e', 6)
setTransition(6, 'a', 7)
setTransition(7, 'c', 8)
setTransition(8, 'h', 9)
setTransition(1, ' ', 10)
setTransition(1, EOF, 11)
setSymbol(2, Symbol.FFF)
setSymbol(3, Symbol.FFF)
setSymbol(5, Symbol.FOR)
setSymbol(9, Symbol.FOREACH)
setSymbol(11, Symbol.EOF)
}
}
data class Token(val symbol: Symbol, val lexeme: String, val startRow: Int, val startColumn: Int)
class Scanner(private val automaton: DFA, private val stream: InputStream) {
private var last: Int? = null
private var row = 1
private var column = 1
private fun updatePosition(code: Int) {
if (code == NEWLINE) {
row += 1
column = 1
} else {
column += 1
}
}
fun getToken(): Token {
val startRow = row
val startColumn = column
val buffer = mutableListOf<Char>()
var code = last ?: stream.read()
var state = automaton.startState
while (true) {
val nextState = automaton.next(state, code)
if (nextState == ERROR_STATE) break // Longest match
state = nextState
updatePosition(code)
buffer.add(code.toChar())
code = stream.read()
}
last = code // The code following the current lexeme is the first code of the next lexeme
if (automaton.finalStates.contains(state)) {
val symbol = automaton.symbol(state)
return if (symbol == Symbol.SKIP) {
getToken()
} else {
val lexeme = String(buffer.toCharArray())
Token(symbol, lexeme, startRow, startColumn)
}
} else {
throw Error("Invalid pattern at ${row}:${column}")
}
}
}
fun name(symbol: Symbol) =
when (symbol) {
Symbol.FOR -> "for"
Symbol.FOREACH -> "foreach"
Symbol.FFF -> "fff"
else -> throw Error("Invalid symbol")
}
fun printTokens(scanner: Scanner, output: OutputStream) {
val writer = output.writer(Charsets.UTF_8)
var token = scanner.getToken()
while (token.symbol != Symbol.EOF) {
writer.append("${name(token.symbol)}(\"${token.lexeme}\") ") // The output ends with a space!
token = scanner.getToken()
}
writer.appendLine()
writer.flush()
}
fun main(args: Array<String>) {
printTokens(Scanner(ForForeachFFFAutomaton, "fffffff foreach for f".byteInputStream()), System.out)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment