Skip to content

Instantly share code, notes, and snippets.

@IARI
Last active August 17, 2018 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IARI/91011233658d386f1f1aefd2450537f2 to your computer and use it in GitHub Desktop.
Save IARI/91011233658d386f1f1aefd2450537f2 to your computer and use it in GitHub Desktop.
package de.imc.placeholder.app
import com.github.h0tk3y.betterParse.lexer.*
import com.github.h0tk3y.betterParse.utils.cached
import java.io.InputStream
import java.util.*
import java.util.logging.Logger
import java.util.regex.MatchResult
import kotlin.coroutines.experimental.buildSequence
/** Tokenizes input character sequences using the [tokens], prioritized by their order in the list,
* first matched first. */
class MyTokenizer(override val tokens: List<Token>, vararg options: RegexOption) : Tokenizer {
init {
require(tokens.isNotEmpty()) { "The tokens list should not be empty" }
}
val logger = Logger.getLogger(javaClass.simpleName)
val patterns = tokens.map { it to (it.regex?.toPattern() ?: it.pattern.toRegex(setOf(*options)).toPattern()) }
private val allInOnePattern = patterns
.joinToString("|", prefix = "\\G(?:", postfix = ")") { "(${it.second.pattern()})" }.toPattern()
private val patternGroupIndices =
buildSequence {
var groupId = 1 // the zero group is the whole match
for (p in patterns) {
yield(groupId) // the group for the current pattern
groupId += p.second.matcher("").groupCount() + 1 // skip all the nested groups in p
}
}.toList()
/** Tokenizes the [input] from a [String] into a [TokenizerMatchesSequence]. */
override fun tokenize(input: String) = input.scanTokenize(::Scanner)
/** Tokenizes the [input] from an [InputStream] into a [TokenizerMatchesSequence]. */
override fun tokenize(input: InputStream) = input.scanTokenize(::Scanner)
/** Tokenizes the [input] from a [Readable] into a [TokenizerMatchesSequence]. */
override fun tokenize(input: Readable) = input.scanTokenize(::Scanner)
fun <T> T.scanTokenize(const: (T) -> Scanner) = const(this).apply {
//useDelimiter("""(?=\n)""")
logger.info("Delimiter:")
logger.info(delimiter().pattern())
}.run { tokenize(this) }
/** Tokenizes the [input] from a [Scanner] into a [TokenizerMatchesSequence]. */
override fun tokenize(input: Scanner): Sequence<TokenMatch> = buildSequence<TokenMatch> {
var pos = 0
var row = 1
var col = 1
while (input.hasNext()) {
val matchResult: MatchResult
val matchedToken: Token =
if (input.findWithinHorizon(allInOnePattern, 0) != null) {
matchResult = input.match()
tokens[patternGroupIndices.indexOfFirst { matchResult.group(it) != null }]
} else {
logger.warning(allInOnePattern.pattern())
logger.warning("could not match")
logger.warning(input.toString())
val test = input.findWithinHorizon(".".toRegex(setOf(RegexOption.MULTILINE, RegexOption.DOT_MATCHES_ALL)).toPattern()
, 0)
if (test == null) {
logger.warning("DID NOT FIND More;")
} else {
logger.warning("DID FIND;")
logger.warning("'$test'")
}
yield(TokenMatch(noneMatched, input.next(), pos, row, col))
break
}
logger.info("matches: '${matchResult.group()}'")
val match = matchResult.group()
val result = TokenMatch(matchedToken, match, pos, row, col)
pos += match.length
col += match.length
val addRows = match.count { it == '\n' }
row += addRows
if (addRows > 0) {
col = match.length - match.lastIndexOf('\n')
}
yield(result)
}
}.cached()
}
package de.imc.placeholder.app.parsing
import com.github.h0tk3y.betterParse.combinators.map
import com.github.h0tk3y.betterParse.combinators.zeroOrMore
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.parser.Parser
import de.imc.placeholder.app.MyTokenizer
object testgrammar : Grammar<String>() {
val singleToken by token(""".+""".toRegex(RegexOption.DOT_MATCHES_ALL))
override val rootParser: Parser<String> by zeroOrMore(singleToken) map { it.joinToString("#") }
override val tokenizer by lazy {
MyTokenizer(tokens, RegexOption.MULTILINE, RegexOption.DOT_MATCHES_ALL)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment