Last active
July 27, 2020 07:40
-
-
Save AlexCouch/3cc69de5a181ddddde91884ca7915f35 to your computer and use it in GitHub Desktop.
A lexer written with coroutines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import arrow.core.* | |
import kotlinx.coroutines.CoroutineScope | |
import kotlinx.coroutines.ExperimentalCoroutinesApi | |
import kotlinx.coroutines.channels.produce | |
import kotlinx.coroutines.runBlocking | |
fun String.advance(idx: Int): Option<Char> = | |
if(idx >= length){ | |
arrow.core.none() | |
}else{ | |
this[idx].some() | |
} | |
@ExperimentalCoroutinesApi | |
fun CoroutineScope.matchAndProduce( | |
string: String, | |
startingIndex: Int, | |
initPredicate: (Char) -> Boolean, | |
continuousPredicate: (Char) -> Boolean, | |
onMatch: (Char) -> Unit = {} | |
) = produce{ | |
send(buildString { | |
val start = string.advance(startingIndex) | |
if(start.isEmpty()){ | |
send(arrow.core.none()) | |
return@produce | |
} | |
if(!initPredicate((start as Some).t)) { | |
send(arrow.core.none()) | |
return@produce | |
} | |
var idx = startingIndex | |
do{ | |
val n = string.advance(idx) | |
when(n){ | |
is None -> break | |
is Some -> when{ | |
continuousPredicate(n.t) -> { | |
onMatch(n.t) | |
append(n.t) | |
} | |
else -> break | |
} | |
} | |
idx++ | |
}while(n is Some && continuousPredicate(n.t)) | |
}.some()) | |
} | |
@ExperimentalCoroutinesApi | |
suspend fun String.tokenize(): Either<Sequence<LexerToken>, String> = | |
sequence { | |
//Initialize the properties of a lexer | |
// index is the current char index | |
// line is the current line in the string | |
// col is the current column of the string | |
//Together these are used to produce a token every iteration. | |
// If no token could be produced, we raise an error | |
var index = 0 | |
var line = 1 | |
var col = 1 | |
mainLoop@ while (index < length) { | |
val char = advance(index) | |
if (char.isEmpty()) break | |
when (char) { | |
is None -> break | |
is Some -> { | |
val startLine = line | |
val startCol = col | |
val startIdx = index | |
runBlocking { | |
val whitespace = matchAndProduce( | |
this@tokenize, | |
index, | |
initPredicate = { it.isWhitespace() }, | |
continuousPredicate = { | |
it.isWhitespace() || it == '\r' || it == '\n' | |
}, | |
{ c -> | |
if (c == '\r' || c == '\n') { | |
line++ | |
col = 1 | |
} | |
index++ | |
} | |
) | |
whitespace.receive() | |
} | |
val digit = runBlocking { | |
matchAndProduce( | |
this@tokenize, | |
index, | |
initPredicate = { it.isDigit() }, | |
continuousPredicate = { it.isDigit() }, | |
onMatch = { | |
index++ | |
col++ | |
} | |
).receive() | |
} | |
if (digit.isDefined()) { | |
val lexeme = (digit as Some).t | |
yield(LexerToken.IntegerToken(lexeme, LexerPosition(startLine, startCol, startIdx, line, col, index))) | |
} | |
val identifier = runBlocking { | |
matchAndProduce( | |
this@tokenize, | |
index, | |
initPredicate = { it.isLetter() }, | |
continuousPredicate = { it.isLetterOrDigit() }, | |
onMatch = { | |
index++ | |
col++ | |
} | |
).receive() | |
} | |
if (identifier.isDefined()) { | |
val lexeme = (identifier as Some).t | |
yield(LexerToken.IdentifierToken( | |
lexeme, | |
LexerPosition( | |
startLine, | |
startCol, | |
startIdx, | |
line, | |
col, | |
index | |
) | |
)) | |
} | |
} | |
} | |
index++ | |
col++ | |
} | |
}.left() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment