Skip to content

Instantly share code, notes, and snippets.

@AlexCouch
Last active July 27, 2020 07:40
Show Gist options
  • Save AlexCouch/3cc69de5a181ddddde91884ca7915f35 to your computer and use it in GitHub Desktop.
Save AlexCouch/3cc69de5a181ddddde91884ca7915f35 to your computer and use it in GitHub Desktop.
A lexer written with coroutines
import arrow.core.*
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.channels.produce
import kotlinx.coroutines.runBlocking
fun String.advance(idx: Int): Option<Char> =
if(idx >= length){
arrow.core.none()
}else{
this[idx].some()
}
@ExperimentalCoroutinesApi
fun CoroutineScope.matchAndProduce(
string: String,
startingIndex: Int,
initPredicate: (Char) -> Boolean,
continuousPredicate: (Char) -> Boolean,
onMatch: (Char) -> Unit = {}
) = produce{
send(buildString {
val start = string.advance(startingIndex)
if(start.isEmpty()){
send(arrow.core.none())
return@produce
}
if(!initPredicate((start as Some).t)) {
send(arrow.core.none())
return@produce
}
var idx = startingIndex
do{
val n = string.advance(idx)
when(n){
is None -> break
is Some -> when{
continuousPredicate(n.t) -> {
onMatch(n.t)
append(n.t)
}
else -> break
}
}
idx++
}while(n is Some && continuousPredicate(n.t))
}.some())
}
@ExperimentalCoroutinesApi
suspend fun String.tokenize(): Either<Sequence<LexerToken>, String> =
sequence {
//Initialize the properties of a lexer
// index is the current char index
// line is the current line in the string
// col is the current column of the string
//Together these are used to produce a token every iteration.
// If no token could be produced, we raise an error
var index = 0
var line = 1
var col = 1
mainLoop@ while (index < length) {
val char = advance(index)
if (char.isEmpty()) break
when (char) {
is None -> break
is Some -> {
val startLine = line
val startCol = col
val startIdx = index
runBlocking {
val whitespace = matchAndProduce(
this@tokenize,
index,
initPredicate = { it.isWhitespace() },
continuousPredicate = {
it.isWhitespace() || it == '\r' || it == '\n'
},
{ c ->
if (c == '\r' || c == '\n') {
line++
col = 1
}
index++
}
)
whitespace.receive()
}
val digit = runBlocking {
matchAndProduce(
this@tokenize,
index,
initPredicate = { it.isDigit() },
continuousPredicate = { it.isDigit() },
onMatch = {
index++
col++
}
).receive()
}
if (digit.isDefined()) {
val lexeme = (digit as Some).t
yield(LexerToken.IntegerToken(lexeme, LexerPosition(startLine, startCol, startIdx, line, col, index)))
}
val identifier = runBlocking {
matchAndProduce(
this@tokenize,
index,
initPredicate = { it.isLetter() },
continuousPredicate = { it.isLetterOrDigit() },
onMatch = {
index++
col++
}
).receive()
}
if (identifier.isDefined()) {
val lexeme = (identifier as Some).t
yield(LexerToken.IdentifierToken(
lexeme,
LexerPosition(
startLine,
startCol,
startIdx,
line,
col,
index
)
))
}
}
}
index++
col++
}
}.left()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment