Last active
August 29, 2015 14:07
-
-
Save flq/d17f239e6a7e122adbb0 to your computer and use it in GitHub Desktop.
Lexing and Parsing in F# on Cobol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Ast | |
open System | |
type slotDeclaration= { | |
Index : int | |
Name : string | |
Size : int | |
} | |
type dataSection= | |
| WorkStorage of slotDeclaration list | |
type configurationSectionParagraph()= class end | |
type configurationSection= { | |
ConfigSections: configurationSectionParagraph list | |
} | |
type inputOutputSection()= class end | |
type environmentDivision= { | |
Config: configurationSection | |
InOut: inputOutputSection | |
} | |
type identificationDivision= { | |
ProgramId : string | |
Author: string option | |
Installation: string option | |
DateWritten: DateTime option | |
DateCompiled: DateTime option | |
} | |
type op= | |
| Is | |
type condition= | |
| Comparison of (string * op * string) | |
type command= | |
| DisplayStatement of string | |
| FormatDisplayStatement of (string * string) | |
| AcceptStatement of string | |
| IfStatement of ifStatement | |
and ifStatement= { | |
Condition: condition | |
IfBranch: command list | |
ElseBranch: command list option | |
} | |
type cobolSourceProgram = { | |
IdentDiv : identificationDivision | |
EnvDiv : environmentDivision option | |
DataDiv : dataSection list option | |
ProcDiv : command list option | |
NestedCobolProg : cobolSourceProgram option | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
module TheLexer | |
open System | |
open Microsoft.FSharp.Text.Lexing | |
open TheParser | |
let lexeme = LexBuffer<_>.LexemeString | |
let cobolWords = | |
[ | |
"IDENTIFICATION DIVISION.", IDENTIFICATION; | |
"PROGRAM-ID.", PROGRAM_ID; | |
"AUTHOR.", AUTHOR; | |
"DATA DIVISION.", DATA; | |
"WORKING-STORAGE SECTION.", WORKING_STORAGE; | |
"PROCEDURE DIVISION.", PROCEDURE; | |
"STOP RUN.", STOP; | |
] |> Map.ofList | |
let commands = | |
[ | |
"DISPLAY", DISPLAY; | |
"ACCEPT", ACCEPT; | |
"IF", IF; | |
"END-IF", END_IF; | |
"IS", IS; | |
] |> Map.ofList | |
let doMatch (dict : Map<'a,'b>) str = | |
match dict.TryFind(str) with | |
| Some(token) -> token | |
| None -> ID(str) | |
let log (id : string) (str : string) = Console.WriteLine(id + " " + str) | |
} | |
let whitespace = [' ' '\t'] | |
let newline = "\n\r" | '\n' | '\r' | |
let digit = ['0'-'9'] | |
let dot = '.' | |
let comma = ',' | |
let lineTerminator = '.'newline | |
let comment = '*'_* | |
let identifierChar = ['A'-'Z' ' ' '-'] | |
let identifier = identifierChar+'.' | |
let commandChar = ['A'-'Z' '-'] | |
let command = commandChar+ | |
let freeLiteral = ['A'-'Z' 'a'-'z' '-'] | |
let anyNotQuote = [^'"'] | |
let literal = '"'anyNotQuote*'"' | |
let slotSizeBegin = "PIC "commandChar+'(' | |
let bracketClose = ')' | |
rule tokenize = parse | |
| identifier { log "identifier" (lexeme lexbuf); doMatch cobolWords (lexeme lexbuf) } | |
| command { log "command" (lexeme lexbuf); doMatch commands (lexeme lexbuf) } | |
| digit+ { log "digit+" (lexeme lexbuf); NUMBER(Convert.ToInt32(lexeme lexbuf)) } | |
| freeLiteral+ { log "freeLiteral+" (lexeme lexbuf); ID(lexeme lexbuf) } | |
| literal { log "literal" (lexeme lexbuf); LITERAL(lexeme lexbuf) } | |
| whitespace { log "_" (lexeme lexbuf); tokenize lexbuf } | |
| comment { log "comment" (lexeme lexbuf); tokenize lexbuf } | |
| dot { log "" (lexeme lexbuf); tokenize lexbuf } | |
| comma { log "" (lexeme lexbuf); tokenize lexbuf } | |
| slotSizeBegin { log "slotSizeBegin" (lexeme lexbuf); tokenize lexbuf } | |
| bracketClose { log "bracketClose" (lexeme lexbuf); tokenize lexbuf } | |
| lineTerminator { log "lineTerminator" (lexeme lexbuf); lexbuf.EndPos <- lexbuf.EndPos.NextLine; tokenize lexbuf; } | |
| newline { log "newline" (lexeme lexbuf); lexbuf.EndPos <- lexbuf.EndPos.NextLine; tokenize lexbuf } | |
| eof { log "eof" (lexeme lexbuf); EOF } | |
| _ { failwithf "unrecognized input: '%s'" <| lexeme lexbuf } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%{ | |
open Ast | |
%} | |
%token <int> NUMBER | |
%token <string> ID | |
%token <string> LITERAL SLOT | |
%token IDENTIFICATION PROGRAM_ID AUTHOR EOF | |
%token PROCEDURE DATA WORKING_STORAGE | |
%token DISPLAY ACCEPT STOP IF END_IF | |
%token IS | |
%start start | |
%type <cobolSourceProgram> start | |
%% | |
start: | |
| identification procedures { | |
{ | |
IdentDiv = $1; | |
ProcDiv = Some($2); | |
EnvDiv = None; | |
DataDiv = None; | |
NestedCobolProg = None; | |
} | |
} | |
| identification data procedures { | |
{ | |
IdentDiv = $1; | |
ProcDiv = Some($3); | |
EnvDiv = None; | |
DataDiv = Some($2); | |
NestedCobolProg = None; | |
} | |
} | |
identification: | |
| IDENTIFICATION PROGRAM_ID ID AUTHOR ID { | |
{ | |
ProgramId = $3; | |
Author = Some($5); | |
Installation = None; | |
DateWritten = None; | |
DateCompiled = None; | |
} | |
} | |
| IDENTIFICATION AUTHOR ID PROGRAM_ID ID { | |
{ | |
ProgramId = $5; | |
Author = Some($3); | |
Installation = None; | |
DateWritten = None; | |
DateCompiled = None; | |
} | |
} | |
data: DATA sections { $2 } | |
sections: | |
| WORKING_STORAGE slotDeclarations { [WorkStorage($2)] } | |
slotDeclarations: | |
| slot { [$1] } | |
| slotDeclarations slot { $2 :: $1 } | |
slot: | |
| NUMBER ID NUMBER { { Index = $1; Name = $2; Size = $3; } } | |
procedures: PROCEDURE statements { $2 } | |
statements: | |
| command { [$1] } | |
| statements command { $1 @ [$2] } | |
op: IS { Is } | |
condition: | |
| ID op ID { Comparison($1, $2, $3) } | |
command: | |
| DISPLAY LITERAL { DisplayStatement($2) } | |
| DISPLAY LITERAL ID { FormatDisplayStatement($2, $3) } | |
| ACCEPT ID { AcceptStatement($2) } | |
| IF condition statements END_IF { IfStatement { Condition = $2; IfBranch = $3; ElseBranch = None } } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment