Skip to content

Instantly share code, notes, and snippets.

@loosechainsaw
Last active August 29, 2015 14:05
Show Gist options
  • Save loosechainsaw/421eff5b0430a6b0064f to your computer and use it in GitHub Desktop.
Save loosechainsaw/421eff5b0430a6b0064f to your computer and use it in GitHub Desktop.
namespace JsonParser
open System
open NUnit.Framework
module Json =
type Token =
| String of string
| Number of string
| Null
| True
| False
| OpenBracket
| CloseBracket
| OpenParen
| CloseParen
| OpenBrace
| CloseBrace
| Colon
| Comma
let private (|MatchColon|_|) c =
if c = ':' then
Some(c)
else
None
let private (|MatchComma|_|) c =
if c = ',' then
Some(c)
else
None
let private (|MatchDoubleQuotes|_|) c =
if c = '"' then
Some(c)
else
None
let private (|MatchPlus|_|) c =
if c = '+' then
Some(c)
else
None
let private (|MatchMinus|_|) c =
if c = '-' then
Some(c)
else
None
let private (|MatchOpenParen|_|) c =
if c = '(' then
Some(c)
else
None
let private (|MatchCloseBrace|_|) c =
if c = '}' then
Some(c)
else
None
let private (|MatchOpenBrace|_|) c =
if c = '{' then
Some(c)
else
None
let private (|MatchCloseParen|_|) c =
if c = ')' then
Some(c)
else
None
let private (|MatchOpenBracket|_|) c =
if c = '[' then
Some(c)
else
None
let private (|MatchCloseBracket|_|) c =
if c = ']' then
Some(c)
else
None
let private (|MatchEscapedDoubleQuotes|_|) c =
if c = '\"' then
Some(c)
else
None
let private (|MatchWhitespace|_|) c =
if Char.IsWhiteSpace(c) then
Some("")
else
None
let private (|MatchDigit|_|) c =
if Char.IsDigit(c) then
Some(c)
else
None
let rec private scan_string s (acc: string) =
match s with
| [] -> (s, acc)
| '\\' :: MatchDoubleQuotes _ :: _ :: t ->
scan_string t ( acc + "\"")
| MatchDoubleQuotes _ :: t ->
(t , acc)
| h :: t ->
scan_string t <| acc + string h
| _ -> failwith "Invalid string"
let rec private scan_digit (s:char list) (acc:string) =
match s with
| [] -> (s,acc)
| MatchComma _ :: t -> (s, acc)
| MatchColon _ :: t -> (s,acc)
| MatchWhitespace _ :: t -> (s,acc)
| MatchCloseBracket _ :: t -> (s,acc)
| MatchCloseBrace _ :: t -> (s,acc)
| MatchDigit h :: t -> scan_digit t (acc + string h)
| _ -> failwith "Unexpected character"
let private literal_map input =
match input with
| "null" -> Some(Null)
| "true" -> Some(True)
| "false" -> Some(False)
| _ -> None
let rec private scan_other (s:char list) (acc:string) =
match s with
| [] -> (s, literal_map acc)
| MatchComma _ :: t -> (s, literal_map acc)
| MatchColon _ :: t -> (s,literal_map acc)
| MatchWhitespace _ :: t -> (t,literal_map acc)
| h :: t -> scan_other t (acc + string h)
| _ -> failwith "Unexpected character"
let tokenizer (input:string) =
let rec tokenizer_impl (input:char list) (acc: Token list) =
match input with
| [] -> acc
| MatchWhitespace _ :: t -> tokenizer_impl t acc
| MatchColon _ :: t ->
tokenizer_impl t ( Colon :: acc)
| MatchComma _ :: t ->
tokenizer_impl t ( Comma :: acc)
| MatchOpenParen _ :: t ->
tokenizer_impl t ( OpenParen :: acc)
| MatchCloseParen _ :: t ->
tokenizer_impl t ( CloseParen :: acc)
| MatchCloseBracket _ :: t ->
tokenizer_impl t ( CloseBracket :: acc)
| MatchOpenBracket _ :: t ->
tokenizer_impl t ( OpenBracket :: acc)
| MatchCloseBrace _ :: t ->
tokenizer_impl t ( CloseBrace :: acc)
| MatchOpenBrace _ :: t ->
tokenizer_impl t ( OpenBrace :: acc)
| MatchDoubleQuotes _ :: t ->
let (a, b) = scan_string t ""
tokenizer_impl a <| String(b) :: acc
| MatchDigit h :: t->
let (a, b) = scan_digit input ""
tokenizer_impl a ( Number(b) :: acc)
| MatchPlus _ :: MatchDigit h :: t->
let (a, b) = scan_digit ( h :: t ) ""
tokenizer_impl a ( Number(b) :: acc)
| MatchMinus _ :: MatchDigit h :: t->
let (a, b) = scan_digit (h :: t) "-"
tokenizer_impl a ( Number(b) :: acc)
| h :: t ->
let (a, b) = scan_other (h :: t) "-"
match b with
| None -> failwith "Expected a literal value but found none"
| Some(v) -> tokenizer_impl a ( v :: acc)
tokenizer_impl (List.ofArray (input.ToCharArray())) [] |> List.rev
type Ast =
| String of string
| Number of string
| Boolean of bool
| Null
| Array of Ast list
| Obj of (String * Ast) list
| Empty
let parse (input:string) =
let rec parse_array (input: Token list) ( acc: Ast list) =
match input with
| Comma :: CloseBracket :: t -> failwith "Unexpected , "
| CloseBracket :: [] -> acc
| CloseBracket :: t -> failwith "Unpected input after the end of the array"
| h :: Comma :: t ->
parse_array t ((parse_impl <| h :: []) :: acc)
| h :: CloseBracket :: [] ->
(parse_impl <| h :: []) :: acc
| _ -> failwith "Unpected token in stream"
and parse_object (input: Token list) ( acc: (String * Ast) list) =
match input with
| Token.String(property) :: Colon :: value :: Token.CloseBrace :: [] ->
let p = parse_impl <| value :: []
let a = (property,p)
( a :: acc)
| Token.String(property) :: Colon :: value :: Comma :: t ->
let p = parse_impl <| value :: []
let a = (property,p)
parse_object t ( a :: acc)
| _ -> failwith "Unpected token in stream"
and parse_impl (input:Token list) =
match input with
| [] -> Empty
| Token.String(v) :: [] -> String(v)
| Token.Number(v) :: [] -> Number(v)
| Token.Null :: [] -> Null
| True :: [] -> Boolean(true)
| False :: [] -> Boolean(false)
| Token.OpenBracket :: t -> Array( List.rev (parse_array t []))
| Token.OpenBrace :: t -> Obj( List.rev (parse_object t []))
| _ -> failwith "Invalid Json Token Stream"
parse_impl (tokenizer input)
module UnitTests =
[<TestFixture>]
module TokenizerTests =
open Json
[<Test>]
let scan_string_test () =
let text = @"""Hello World"""
let token = tokenizer text
printfn "%A" token
[<Test>]
let scan_number_test () =
let text = "123455"
let token = tokenizer text
printfn "%A" token
[<Test>]
let scan_array_test () =
let text = "[1,2,3,4,5,66]"
let token = tokenizer text
printfn "%A" token
[<Test>]
let array_parse_example () =
let text = "[1,2,3,4,5,66]"
let ast = parse text
printfn "%A" ast
[<Test>]
let obj_example () =
let text = @"{ ""Name"" : ""Blair Davidson"" }"
let ast = parse text
printfn "%A" ast
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment