Created
June 24, 2016 03:14
-
-
Save tangentstorm/61b8a539650a487b78e37b124dd29dcf to your computer and use it in GitHub Desktop.
lexer and parser for K language... part of IntelliK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
parserClass="com.x1010data.intellik.parser.KParser" | |
extends="com.intellij.extapi.psi.ASTWrapperPsiElement" | |
psiClassPrefix="K" | |
psiImplClassSuffix="Impl" | |
psiPackage="com.x1010data.intellik.psi" | |
psiImplPackage="com.x1010data.intellik.psi.impl" | |
elementTypeHolderClass="com.x1010data.intellik.psi.KTypes" | |
elementTypeClass="com.x1010data.intellik.psi.KElementType" | |
tokenTypeClass="com.x1010data.intellik.psi.KTokenType" | |
psiImplUtilClass="com.x1010data.intellik.psi.impl.KPsiImpUtil" | |
} | |
file ::= item* | |
item ::= (line | cmd | NEWLINE) | |
cmd ::= COMMAND COMMENT? NEWLINE | |
line ::= stmts? COMMENT? NEWLINE | |
stmts ::= stmt (SEMI stmt)* | |
block ::= item* stmts? // so we can have stuff on the last line, before the end brace/bracket/whatever | |
stmt ::= ifStmt | doStmt | whileStmt | define | expr | // last is empty statement | |
define ::= lvalue COLON expr | |
expr ::= (amend | cond | value | op+ | LPAREN expr RPAREN | ADV)+ | |
op ::= (PRIM | BUILTIN | DOT | COLON | MINUS | IDENT) tail | |
value ::= lvalue | ((lambda | noun) tail) | |
lvalue ::= IDENT tail | |
lambda ::= LBRACE sig? block RBRACE | |
sig ::= LBRACK (IDENT (SEMI IDENT)*)? RBRACK | |
noun ::= STRING | (num+) | (SYM+) | list | dict | |
num ::= MINUS? (INT | RAT) | |
list ::= LPAREN block RPAREN | |
dict ::= DOT LPAREN entry* RPAREN | |
entry ::= LPAREN expr SEMI expr (SEMI expr?)? RPAREN | |
tail ::= (attr | index | ADV)* | |
attr ::= DOT IDENT | |
index ::= LBRACK block RBRACK | |
amend ::= DOT LBRACK block RBRACK | |
cond ::= COLON LBRACK block RBRACK | |
ifStmt ::= IF LBRACK block RBRACK | |
doStmt ::= DO LBRACK block RBRACK | |
whileStmt ::= WHILE LBRACK block RBRACK |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.x1010data.intellik; | |
import com.intellij.lexer.FlexLexer; | |
import com.intellij.psi.tree.IElementType; | |
import com.intellij.psi.TokenType; | |
import com.x1010data.intellik.psi.KTypes; | |
%% | |
%class KLexer | |
%implements FlexLexer | |
%unicode | |
%function advance | |
%type IElementType | |
%eof{ return; | |
%eof} | |
Digit = [0-9] | |
Alpha = [a-zA-Z] | |
Nat = {Digit}+ | |
WHITESPACE = [\ \t\f] | |
NEWLINE = \n|\r|\r\n | |
COMMENT = "/" [^\r\n]* | |
COMMAND = "\\"[^\r\n]* | |
SEMI = ";" | |
COLON = ":" | |
DOT = "." | |
LPAREN = "(" | |
RPAREN = ")" | |
LBRACE = "{" | |
RBRACE = "}" | |
LBRACK = "[" | |
RBRACK = "]" | |
MINUS = "-" | |
PRIM = ([~!@#$%\^&*+<>|,?=_] ":"?) | ({Digit} ":") // [;:.\-] are also primitives, but handled specially | |
ADV = "'" | "':" | "/:" | "\\:" | |
INT = {Nat} // handle negatives in the parser | |
RAT = {INT}\.{Nat} | |
BUILTIN = "_" ("bin"|"di"|"dv"|"dvl"|"draw"|"gtime"|"ic"|"ci"|"jd"|"dj"|"lsq"|"dot"|"mul"|"inv"|"in"|"lin" | |
|"sv"|"sm"|"ss"|"ssr"|"vs"|"abs"|"floor"|"sin"|"cos"|"tan"|"sinh"|"cosh"|"tanh" | |
|"exp"|"log"|"sqr"|"sqrt") | |
IF = "if" | |
DO = "do" | |
WHILE = "while" | |
IDENT = {Alpha} ({Alpha}|{Digit}|"_")* | |
// STRING = "\"" ([\\].|[^\"])* "\"" | |
ESCAPE=\\[^\r\n] | |
STRING=\"([^\\\"\r\n]|{ESCAPE}|(\\[\r\n]))*?(\"|\\)? | |
SYM = "`" ({IDENT} | {STRING})? | |
// INLINE means that the / character behaves as an adverb (unlike INLINE, where it starts a comment) | |
%state INLINE | |
%state STRING | |
%% | |
{LPAREN} { yybegin(INLINE); return KTypes.LPAREN; } | |
{RPAREN} { yybegin(INLINE); return KTypes.RPAREN; } | |
{LBRACE} { yybegin(INLINE); return KTypes.LBRACE; } | |
{RBRACE} { yybegin(INLINE); return KTypes.RBRACE; } | |
{LBRACK} { yybegin(INLINE); return KTypes.LBRACK; } | |
{RBRACK} { yybegin(INLINE); return KTypes.RBRACK; } | |
{SEMI} { yybegin(INLINE); return KTypes.SEMI; } | |
{COLON} { yybegin(INLINE); return KTypes.COLON; } | |
{DOT} { yybegin(INLINE); return KTypes.DOT; } | |
{MINUS} { yybegin(INLINE); return KTypes.MINUS; } | |
{PRIM} { yybegin(INLINE); return KTypes.PRIM; } | |
{IF} { yybegin(YYINITIAL); return KTypes.IF; } | |
{DO} { yybegin(YYINITIAL); return KTypes.DO; } | |
{WHILE} { yybegin(YYINITIAL); return KTypes.WHILE; } | |
{BUILTIN} { yybegin(INLINE); return KTypes.BUILTIN; } | |
{IDENT} { yybegin(INLINE); return KTypes.IDENT; } | |
{STRING} { yybegin(INLINE); return KTypes.STRING; } | |
{SYM} { yybegin(INLINE); return KTypes.SYM; } | |
{INT} { yybegin(INLINE); return KTypes.INT; } | |
{RAT} { yybegin(INLINE); return KTypes.RAT; } | |
{ADV} { return KTypes.ADV; } | |
<INLINE> "/" { return KTypes.ADV; } | |
<INLINE> "\\" { return KTypes.ADV; } | |
<YYINITIAL> {COMMENT} { yybegin(YYINITIAL); return KTypes.COMMENT; } | |
<YYINITIAL> {COMMAND} { yybegin(YYINITIAL); return KTypes.COMMAND; } | |
{NEWLINE} { yybegin(YYINITIAL); return KTypes.NEWLINE; } | |
{WHITESPACE} { yybegin(YYINITIAL); return TokenType.WHITE_SPACE; } | |
. { return TokenType.BAD_CHARACTER; } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment