Skip to content

Instantly share code, notes, and snippets.

@flq
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flq/d17f239e6a7e122adbb0 to your computer and use it in GitHub Desktop.
Save flq/d17f239e6a7e122adbb0 to your computer and use it in GitHub Desktop.
Lexing and Parsing in F# on Cobol
module Ast
open System
type slotDeclaration= {
Index : int
Name : string
Size : int
}
type dataSection=
| WorkStorage of slotDeclaration list
type configurationSectionParagraph()= class end
type configurationSection= {
ConfigSections: configurationSectionParagraph list
}
type inputOutputSection()= class end
type environmentDivision= {
Config: configurationSection
InOut: inputOutputSection
}
type identificationDivision= {
ProgramId : string
Author: string option
Installation: string option
DateWritten: DateTime option
DateCompiled: DateTime option
}
type op=
| Is
type condition=
| Comparison of (string * op * string)
type command=
| DisplayStatement of string
| FormatDisplayStatement of (string * string)
| AcceptStatement of string
| IfStatement of ifStatement
and ifStatement= {
Condition: condition
IfBranch: command list
ElseBranch: command list option
}
type cobolSourceProgram = {
IdentDiv : identificationDivision
EnvDiv : environmentDivision option
DataDiv : dataSection list option
ProcDiv : command list option
NestedCobolProg : cobolSourceProgram option
}
{
module TheLexer
open System
open Microsoft.FSharp.Text.Lexing
open TheParser
let lexeme = LexBuffer<_>.LexemeString
let cobolWords =
[
"IDENTIFICATION DIVISION.", IDENTIFICATION;
"PROGRAM-ID.", PROGRAM_ID;
"AUTHOR.", AUTHOR;
"DATA DIVISION.", DATA;
"WORKING-STORAGE SECTION.", WORKING_STORAGE;
"PROCEDURE DIVISION.", PROCEDURE;
"STOP RUN.", STOP;
] |> Map.ofList
let commands =
[
"DISPLAY", DISPLAY;
"ACCEPT", ACCEPT;
"IF", IF;
"END-IF", END_IF;
"IS", IS;
] |> Map.ofList
let doMatch (dict : Map<'a,'b>) str =
match dict.TryFind(str) with
| Some(token) -> token
| None -> ID(str)
let log (id : string) (str : string) = Console.WriteLine(id + " " + str)
}
let whitespace = [' ' '\t']
let newline = "\n\r" | '\n' | '\r'
let digit = ['0'-'9']
let dot = '.'
let comma = ','
let lineTerminator = '.'newline
let comment = '*'_*
let identifierChar = ['A'-'Z' ' ' '-']
let identifier = identifierChar+'.'
let commandChar = ['A'-'Z' '-']
let command = commandChar+
let freeLiteral = ['A'-'Z' 'a'-'z' '-']
let anyNotQuote = [^'"']
let literal = '"'anyNotQuote*'"'
let slotSizeBegin = "PIC "commandChar+'('
let bracketClose = ')'
rule tokenize = parse
| identifier { log "identifier" (lexeme lexbuf); doMatch cobolWords (lexeme lexbuf) }
| command { log "command" (lexeme lexbuf); doMatch commands (lexeme lexbuf) }
| digit+ { log "digit+" (lexeme lexbuf); NUMBER(Convert.ToInt32(lexeme lexbuf)) }
| freeLiteral+ { log "freeLiteral+" (lexeme lexbuf); ID(lexeme lexbuf) }
| literal { log "literal" (lexeme lexbuf); LITERAL(lexeme lexbuf) }
| whitespace { log "_" (lexeme lexbuf); tokenize lexbuf }
| comment { log "comment" (lexeme lexbuf); tokenize lexbuf }
| dot { log "" (lexeme lexbuf); tokenize lexbuf }
| comma { log "" (lexeme lexbuf); tokenize lexbuf }
| slotSizeBegin { log "slotSizeBegin" (lexeme lexbuf); tokenize lexbuf }
| bracketClose { log "bracketClose" (lexeme lexbuf); tokenize lexbuf }
| lineTerminator { log "lineTerminator" (lexeme lexbuf); lexbuf.EndPos <- lexbuf.EndPos.NextLine; tokenize lexbuf; }
| newline { log "newline" (lexeme lexbuf); lexbuf.EndPos <- lexbuf.EndPos.NextLine; tokenize lexbuf }
| eof { log "eof" (lexeme lexbuf); EOF }
| _ { failwithf "unrecognized input: '%s'" <| lexeme lexbuf }
%{
open Ast
%}
%token <int> NUMBER
%token <string> ID
%token <string> LITERAL SLOT
%token IDENTIFICATION PROGRAM_ID AUTHOR EOF
%token PROCEDURE DATA WORKING_STORAGE
%token DISPLAY ACCEPT STOP IF END_IF
%token IS
%start start
%type <cobolSourceProgram> start
%%
start:
| identification procedures {
{
IdentDiv = $1;
ProcDiv = Some($2);
EnvDiv = None;
DataDiv = None;
NestedCobolProg = None;
}
}
| identification data procedures {
{
IdentDiv = $1;
ProcDiv = Some($3);
EnvDiv = None;
DataDiv = Some($2);
NestedCobolProg = None;
}
}
identification:
| IDENTIFICATION PROGRAM_ID ID AUTHOR ID {
{
ProgramId = $3;
Author = Some($5);
Installation = None;
DateWritten = None;
DateCompiled = None;
}
}
| IDENTIFICATION AUTHOR ID PROGRAM_ID ID {
{
ProgramId = $5;
Author = Some($3);
Installation = None;
DateWritten = None;
DateCompiled = None;
}
}
data: DATA sections { $2 }
sections:
| WORKING_STORAGE slotDeclarations { [WorkStorage($2)] }
slotDeclarations:
| slot { [$1] }
| slotDeclarations slot { $2 :: $1 }
slot:
| NUMBER ID NUMBER { { Index = $1; Name = $2; Size = $3; } }
procedures: PROCEDURE statements { $2 }
statements:
| command { [$1] }
| statements command { $1 @ [$2] }
op: IS { Is }
condition:
| ID op ID { Comparison($1, $2, $3) }
command:
| DISPLAY LITERAL { DisplayStatement($2) }
| DISPLAY LITERAL ID { FormatDisplayStatement($2, $3) }
| ACCEPT ID { AcceptStatement($2) }
| IF condition statements END_IF { IfStatement { Condition = $2; IfBranch = $3; ElseBranch = None } }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment