public
Created

A Scalariform based Lexer for RSyntaxTextArea

  • Download Gist
ScalariformTokenMaker.scala
Scala
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
/*
* Copyright (C) 2012 Lalit Pant <pant.lalit@gmail.com>
*
* The contents of this file are subject to the GNU General Public License
* Version 3 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.gnu.org/copyleft/gpl.html
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
*/
 
package net.kogics.kojo.lexer
 
import org.fife.ui.rsyntaxtextarea.AbstractTokenMaker
import org.fife.ui.rsyntaxtextarea.Token
import org.fife.ui.rsyntaxtextarea.TokenMap
import org.fife.ui.rsyntaxtextarea.TokenTypes
 
import javax.swing.text.Segment
import scalariform.lexer.ScalaLexer
import scalariform.lexer.{ Token => SfToken }
import scalariform.lexer.TokenType
import scalariform.lexer.Tokens
 
class ScalariformTokenMaker extends AbstractTokenMaker {
 
override def getCurlyBracesDenoteCodeBlocks = true
 
override def getMarkOccurrencesOfTokenType(tpe: Int) =
tpe == TokenTypes.IDENTIFIER || tpe == TokenTypes.FUNCTION
 
override def getShouldIndentNextLineAfter(t: Token) = {
if (t != null && t.textCount == 1) {
val ch = t.text(t.textOffset)
ch == '{' || ch == '('
}
else {
false
}
}
 
override def getTokenList(pSegment: Segment, pInitialTokenType: Int, pSegmentOffset: Int): Token = {
throw new UnsupportedOperationException
}
 
override def getLastTokenTypeOnLine(text: Segment, initialTokenType: Int) = {
TokenTypes.NULL
}
 
def addTokenHook(array: Array[Char], start: Int, end: Int, tokenType: Int, startOffset: Int) {
addToken(array, start, end, tokenType, startOffset)
}
 
override def getTokenList2(segment: Segment, initialTokenType: Int, segmentOffset: Int, doc: String): Token = {
 
def isLastMultiline(ts: List[SfToken]) = ts match {
case Nil => false
case ts2 =>
val t = ts2.last
t.tokenType match {
case Tokens.WS =>
if (t.rawText(0) == '\n' && t.rawText.size > 1) true else false
case Tokens.STRING_LITERAL =>
if (t.rawText.startsWith("\"\"\"") && t.rawText.contains("\n")) true else false
case _ =>
if (t.rawText.contains("\n")) true else false
}
}
 
def splitLast(t: SfToken) = {
val delta = segmentOffset - t.offset
SfToken(t.tokenType, t.rawText.slice(delta, t.rawText.length), t.offset + delta, t.rawText.slice(delta, t.rawText.length))
}
 
def addRstaToken(t: SfToken) {
val tRSTATokenStart = t.offset + segment.offset - segmentOffset
val tRSTATokenEnd = tRSTATokenStart + t.length - 1
val tRSTATokenOffset = t.offset
addTokenHook(segment.array, tRSTATokenStart, tRSTATokenEnd, convertTokenType(t.tokenType), tRSTATokenOffset)
}
 
val tokens = ScalaLexer.rawTokenise(doc, true)
val (active, inactive) = tokens.partition { t => t.offset >= segmentOffset }
 
resetTokenList()
if (isLastMultiline(inactive)) {
val t2 = splitLast(inactive.last)
addRstaToken(t2)
}
 
val active2 = if (active.size > 1) active.take(active.size - 1) else active
active2.foreach { addRstaToken }
firstToken
}
 
override def getWordsToHighlight: TokenMap = wordsToHighlight
 
def convertTokenType(sfType: TokenType): Int = {
if (Tokens.KEYWORDS.contains(sfType)) {
TokenTypes.RESERVED_WORD
}
else if (Tokens.COMMENTS.contains(sfType)) {
TokenTypes.COMMENT_MULTILINE
}
else {
sfType match {
case Tokens.WS => TokenTypes.WHITESPACE
case Tokens.CHARACTER_LITERAL => TokenTypes.LITERAL_CHAR
case Tokens.INTEGER_LITERAL => TokenTypes.LITERAL_NUMBER_DECIMAL_INT
case Tokens.FLOATING_POINT_LITERAL => TokenTypes.LITERAL_NUMBER_FLOAT
case Tokens.STRING_LITERAL => TokenTypes.LITERAL_STRING_DOUBLE_QUOTE
case Tokens.STRING_PART => TokenTypes.LITERAL_CHAR
case Tokens.SYMBOL_LITERAL => TokenTypes.LITERAL_STRING_DOUBLE_QUOTE
case Tokens.TRUE => TokenTypes.LITERAL_BOOLEAN
case Tokens.FALSE => TokenTypes.LITERAL_BOOLEAN
case Tokens.NULL => TokenTypes.LITERAL_CHAR
case Tokens.EOF => TokenTypes.WHITESPACE
case Tokens.LBRACE => TokenTypes.SEPARATOR
case Tokens.RBRACE => TokenTypes.SEPARATOR
case Tokens.LBRACKET => TokenTypes.SEPARATOR
case Tokens.RBRACKET => TokenTypes.SEPARATOR
case Tokens.LPAREN => TokenTypes.SEPARATOR
case Tokens.RPAREN => TokenTypes.SEPARATOR
 
case Tokens.XML_START_OPEN => TokenTypes.MARKUP_TAG_DELIMITER
case Tokens.XML_EMPTY_CLOSE => TokenTypes.MARKUP_TAG_DELIMITER
case Tokens.XML_TAG_CLOSE => TokenTypes.MARKUP_TAG_DELIMITER
case Tokens.XML_END_OPEN => TokenTypes.MARKUP_TAG_DELIMITER
case Tokens.XML_WHITESPACE => TokenTypes.WHITESPACE
case Tokens.XML_ATTR_EQ => TokenTypes.MARKUP_TAG_ATTRIBUTE
case Tokens.XML_ATTR_VALUE => TokenTypes.MARKUP_TAG_ATTRIBUTE_VALUE
case Tokens.XML_NAME => TokenTypes.MARKUP_TAG_NAME
case Tokens.XML_PCDATA => TokenTypes.IDENTIFIER
case Tokens.XML_COMMENT => TokenTypes.COMMENT_MARKUP
case Tokens.XML_CDATA => TokenTypes.MARKUP_CDATA
case Tokens.XML_UNPARSED => TokenTypes.MARKUP_CDATA
case Tokens.XML_PROCESSING_INSTRUCTION => TokenTypes.MARKUP_PROCESSING_INSTRUCTION
 
case _ => TokenTypes.IDENTIFIER
}
}
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.