AlexCouch/lexer.kt

## lexer.kt
import arrow.core.*
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.channels.produce
import kotlinx.coroutines.runBlocking

fun String.advance(idx: Int): Option<Char> =
    if(idx >= length){
        arrow.core.none()
    }else{
        this[idx].some()
    }

@ExperimentalCoroutinesApi
fun CoroutineScope.matchAndProduce(
    string: String,
    startingIndex: Int,
    initPredicate: (Char) -> Boolean,
    continuousPredicate: (Char) -> Boolean,
    onMatch: (Char) -> Unit = {}
) = produce{
    send(buildString {
        val start = string.advance(startingIndex)
        if(start.isEmpty()){
            send(arrow.core.none())
            return@produce
        }
        if(!initPredicate((start as Some).t)) {
            send(arrow.core.none())
            return@produce
        }
        var idx = startingIndex
        do{
            val n = string.advance(idx)
            when(n){
                is None -> break
                is Some -> when{
                    continuousPredicate(n.t) -> {
                        onMatch(n.t)
                        append(n.t)
                    }
                    else -> break
                }
            }
            idx++
        }while(n is Some && continuousPredicate(n.t))
    }.some())
}

@ExperimentalCoroutinesApi
suspend fun String.tokenize(): Either<Sequence<LexerToken>, String> =
    sequence {
        //Initialize the properties of a lexer
        //  index is the current char index
        //  line is the current line in the string
        //  col is the current column of the string
        //Together these are used to produce a token every iteration.
        // If no token could be produced, we raise an error
        var index = 0
        var line = 1
        var col = 1
        mainLoop@ while (index < length) {
            val char = advance(index)
            if (char.isEmpty()) break
            when (char) {
                is None -> break
                is Some -> {
                    val startLine = line
                    val startCol = col
                    val startIdx = index
                    runBlocking {
                        val whitespace = matchAndProduce(
                            this@tokenize,
                            index,
                            initPredicate = { it.isWhitespace() },
                            continuousPredicate = {
                                it.isWhitespace() || it == '\r' || it == '\n'
                            },
                            { c ->
                                if (c == '\r' || c == '\n') {
                                    line++
                                    col = 1
                                }
                                index++
                            }
                        )
                        whitespace.receive()
                    }
                    val digit = runBlocking {
                        matchAndProduce(
                            this@tokenize,
                            index,
                            initPredicate = { it.isDigit() },
                            continuousPredicate = { it.isDigit() },
                            onMatch = {
                                index++
                                col++
                            }
                        ).receive()
                    }
                    if (digit.isDefined()) {
                        val lexeme = (digit as Some).t
                        yield(LexerToken.IntegerToken(lexeme, LexerPosition(startLine, startCol, startIdx, line, col, index)))
                    }
                    val identifier = runBlocking {
                        matchAndProduce(
                            this@tokenize,
                            index,
                            initPredicate = { it.isLetter() },
                            continuousPredicate = { it.isLetterOrDigit() },
                            onMatch = {
                                index++
                                col++
                            }
                        ).receive()
                    }
                    if (identifier.isDefined()) {
                        val lexeme = (identifier as Some).t
                        yield(LexerToken.IdentifierToken(
                            lexeme,
                            LexerPosition(
                                startLine,
                                startCol,
                                startIdx,
                                line,
                                col,
                                index
                            )
                        ))
                    }

                }
            }
            index++
            col++
        }
    }.left()
	import arrow.core.*
	import kotlinx.coroutines.CoroutineScope
	import kotlinx.coroutines.ExperimentalCoroutinesApi
	import kotlinx.coroutines.channels.produce
	import kotlinx.coroutines.runBlocking

	fun String.advance(idx: Int): Option<Char> =
	if(idx >= length){
	arrow.core.none()
	}else{
	this[idx].some()
	}

	@ExperimentalCoroutinesApi
	fun CoroutineScope.matchAndProduce(
	string: String,
	startingIndex: Int,
	initPredicate: (Char) -> Boolean,
	continuousPredicate: (Char) -> Boolean,
	onMatch: (Char) -> Unit = {}
	) = produce{
	send(buildString {
	val start = string.advance(startingIndex)
	if(start.isEmpty()){
	send(arrow.core.none())
	return@produce
	}
	if(!initPredicate((start as Some).t)) {
	send(arrow.core.none())
	return@produce
	}
	var idx = startingIndex
	do{
	val n = string.advance(idx)
	when(n){
	is None -> break
	is Some -> when{
	continuousPredicate(n.t) -> {
	onMatch(n.t)
	append(n.t)
	}
	else -> break
	}
	}
	idx++
	}while(n is Some && continuousPredicate(n.t))
	}.some())
	}

	@ExperimentalCoroutinesApi
	suspend fun String.tokenize(): Either<Sequence<LexerToken>, String> =
	sequence {
	//Initialize the properties of a lexer
	// index is the current char index
	// line is the current line in the string
	// col is the current column of the string
	//Together these are used to produce a token every iteration.
	// If no token could be produced, we raise an error
	var index = 0
	var line = 1
	var col = 1
	mainLoop@ while (index < length) {
	val char = advance(index)
	if (char.isEmpty()) break
	when (char) {
	is None -> break
	is Some -> {
	val startLine = line
	val startCol = col
	val startIdx = index
	runBlocking {
	val whitespace = matchAndProduce(
	this@tokenize,
	index,
	initPredicate = { it.isWhitespace() },
	continuousPredicate = {
	it.isWhitespace() \|\| it == '\r' \|\| it == '\n'
	},
	{ c ->
	if (c == '\r' \|\| c == '\n') {
	line++
	col = 1
	}
	index++
	}
	)
	whitespace.receive()
	}
	val digit = runBlocking {
	matchAndProduce(
	this@tokenize,
	index,
	initPredicate = { it.isDigit() },
	continuousPredicate = { it.isDigit() },
	onMatch = {
	index++
	col++
	}
	).receive()
	}
	if (digit.isDefined()) {
	val lexeme = (digit as Some).t
	yield(LexerToken.IntegerToken(lexeme, LexerPosition(startLine, startCol, startIdx, line, col, index)))
	}
	val identifier = runBlocking {
	matchAndProduce(
	this@tokenize,
	index,
	initPredicate = { it.isLetter() },
	continuousPredicate = { it.isLetterOrDigit() },
	onMatch = {
	index++
	col++
	}
	).receive()
	}
	if (identifier.isDefined()) {
	val lexeme = (identifier as Some).t
	yield(LexerToken.IdentifierToken(
	lexeme,
	LexerPosition(
	startLine,
	startCol,
	startIdx,
	line,
	col,
	index
	)
	))
	}

	}
	}
	index++
	col++
	}
	}.left()