Skip to content

Instantly share code, notes, and snippets.

@tangentstorm
Created June 24, 2016 03:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tangentstorm/61b8a539650a487b78e37b124dd29dcf to your computer and use it in GitHub Desktop.
Save tangentstorm/61b8a539650a487b78e37b124dd29dcf to your computer and use it in GitHub Desktop.
lexer and parser for K language... part of IntelliK
{
parserClass="com.x1010data.intellik.parser.KParser"
extends="com.intellij.extapi.psi.ASTWrapperPsiElement"
psiClassPrefix="K"
psiImplClassSuffix="Impl"
psiPackage="com.x1010data.intellik.psi"
psiImplPackage="com.x1010data.intellik.psi.impl"
elementTypeHolderClass="com.x1010data.intellik.psi.KTypes"
elementTypeClass="com.x1010data.intellik.psi.KElementType"
tokenTypeClass="com.x1010data.intellik.psi.KTokenType"
psiImplUtilClass="com.x1010data.intellik.psi.impl.KPsiImpUtil"
}
file ::= item*
item ::= (line | cmd | NEWLINE)
cmd ::= COMMAND COMMENT? NEWLINE
line ::= stmts? COMMENT? NEWLINE
stmts ::= stmt (SEMI stmt)*
block ::= item* stmts? // so we can have stuff on the last line, before the end brace/bracket/whatever
stmt ::= ifStmt | doStmt | whileStmt | define | expr | // last is empty statement
define ::= lvalue COLON expr
expr ::= (amend | cond | value | op+ | LPAREN expr RPAREN | ADV)+
op ::= (PRIM | BUILTIN | DOT | COLON | MINUS | IDENT) tail
value ::= lvalue | ((lambda | noun) tail)
lvalue ::= IDENT tail
lambda ::= LBRACE sig? block RBRACE
sig ::= LBRACK (IDENT (SEMI IDENT)*)? RBRACK
noun ::= STRING | (num+) | (SYM+) | list | dict
num ::= MINUS? (INT | RAT)
list ::= LPAREN block RPAREN
dict ::= DOT LPAREN entry* RPAREN
entry ::= LPAREN expr SEMI expr (SEMI expr?)? RPAREN
tail ::= (attr | index | ADV)*
attr ::= DOT IDENT
index ::= LBRACK block RBRACK
amend ::= DOT LBRACK block RBRACK
cond ::= COLON LBRACK block RBRACK
ifStmt ::= IF LBRACK block RBRACK
doStmt ::= DO LBRACK block RBRACK
whileStmt ::= WHILE LBRACK block RBRACK
package com.x1010data.intellik;
import com.intellij.lexer.FlexLexer;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.TokenType;
import com.x1010data.intellik.psi.KTypes;
%%
%class KLexer
%implements FlexLexer
%unicode
%function advance
%type IElementType
%eof{ return;
%eof}
Digit = [0-9]
Alpha = [a-zA-Z]
Nat = {Digit}+
WHITESPACE = [\ \t\f]
NEWLINE = \n|\r|\r\n
COMMENT = "/" [^\r\n]*
COMMAND = "\\"[^\r\n]*
SEMI = ";"
COLON = ":"
DOT = "."
LPAREN = "("
RPAREN = ")"
LBRACE = "{"
RBRACE = "}"
LBRACK = "["
RBRACK = "]"
MINUS = "-"
PRIM = ([~!@#$%\^&*+<>|,?=_] ":"?) | ({Digit} ":") // [;:.\-] are also primitives, but handled specially
ADV = "'" | "':" | "/:" | "\\:"
INT = {Nat} // handle negatives in the parser
RAT = {INT}\.{Nat}
BUILTIN = "_" ("bin"|"di"|"dv"|"dvl"|"draw"|"gtime"|"ic"|"ci"|"jd"|"dj"|"lsq"|"dot"|"mul"|"inv"|"in"|"lin"
|"sv"|"sm"|"ss"|"ssr"|"vs"|"abs"|"floor"|"sin"|"cos"|"tan"|"sinh"|"cosh"|"tanh"
|"exp"|"log"|"sqr"|"sqrt")
IF = "if"
DO = "do"
WHILE = "while"
IDENT = {Alpha} ({Alpha}|{Digit}|"_")*
// STRING = "\"" ([\\].|[^\"])* "\""
ESCAPE=\\[^\r\n]
STRING=\"([^\\\"\r\n]|{ESCAPE}|(\\[\r\n]))*?(\"|\\)?
SYM = "`" ({IDENT} | {STRING})?
// INLINE means that the / character behaves as an adverb (unlike INLINE, where it starts a comment)
%state INLINE
%state STRING
%%
{LPAREN} { yybegin(INLINE); return KTypes.LPAREN; }
{RPAREN} { yybegin(INLINE); return KTypes.RPAREN; }
{LBRACE} { yybegin(INLINE); return KTypes.LBRACE; }
{RBRACE} { yybegin(INLINE); return KTypes.RBRACE; }
{LBRACK} { yybegin(INLINE); return KTypes.LBRACK; }
{RBRACK} { yybegin(INLINE); return KTypes.RBRACK; }
{SEMI} { yybegin(INLINE); return KTypes.SEMI; }
{COLON} { yybegin(INLINE); return KTypes.COLON; }
{DOT} { yybegin(INLINE); return KTypes.DOT; }
{MINUS} { yybegin(INLINE); return KTypes.MINUS; }
{PRIM} { yybegin(INLINE); return KTypes.PRIM; }
{IF} { yybegin(YYINITIAL); return KTypes.IF; }
{DO} { yybegin(YYINITIAL); return KTypes.DO; }
{WHILE} { yybegin(YYINITIAL); return KTypes.WHILE; }
{BUILTIN} { yybegin(INLINE); return KTypes.BUILTIN; }
{IDENT} { yybegin(INLINE); return KTypes.IDENT; }
{STRING} { yybegin(INLINE); return KTypes.STRING; }
{SYM} { yybegin(INLINE); return KTypes.SYM; }
{INT} { yybegin(INLINE); return KTypes.INT; }
{RAT} { yybegin(INLINE); return KTypes.RAT; }
{ADV} { return KTypes.ADV; }
<INLINE> "/" { return KTypes.ADV; }
<INLINE> "\\" { return KTypes.ADV; }
<YYINITIAL> {COMMENT} { yybegin(YYINITIAL); return KTypes.COMMENT; }
<YYINITIAL> {COMMAND} { yybegin(YYINITIAL); return KTypes.COMMAND; }
{NEWLINE} { yybegin(YYINITIAL); return KTypes.NEWLINE; }
{WHITESPACE} { yybegin(YYINITIAL); return TokenType.WHITE_SPACE; }
. { return TokenType.BAD_CHARACTER; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment