Skip to content

Instantly share code, notes, and snippets.

@rflechner
Last active January 15, 2016 11:22
Show Gist options
  • Save rflechner/60fc8a1074fb21cb5ff5 to your computer and use it in GitHub Desktop.
Save rflechner/60fc8a1074fb21cb5ff5 to your computer and use it in GitHub Desktop.
Small FSharp parsing toolkit
(**
Small parsing bases classes inspired by technique used to parse HTML in FSharp.Data.
(see https://github.com/fsharp/FSharp.Data/blob/master/src/Html/HtmlParser.fs#L226)
I liked this parsing strategy and it inspired me for other parsing algorithms.
*)
module ParsingBase
open System
open System.IO
type BufferedString =
{ mutable Content:string }
static member Empty = { Content=String.Empty }
member x.IsNullOrEmpty() = x.Content |> String.IsNullOrEmpty
member x.Chars i = x.Content.Chars i
member x.RemoveFirst i =
if i > x.Content.Length
then x.Clear()
else x.Content <- x.Content.Substring(i)
member x.Length with get() = x.Content.Length
member x.Clear() = x.Content <- String.Empty
member x.ToCharArray() = x.Content.ToCharArray()
member x.SetContent s = x.Content <- s
type SubBufferedTextReader (reader:TextReader) =
let buffer:BufferedString = BufferedString.Empty
member x.PeekChar() =
if buffer.IsNullOrEmpty()
then reader.Peek() |> char
else buffer.Chars 0
member x.ReadNChar n =
if buffer.IsNullOrEmpty()
then
let chars = Array.zeroCreate n
reader.ReadBlock(chars, 0, n) |> ignore
String(chars)
elif buffer.Length >= n then
let s = buffer.Content.Substring(0, n)
buffer.RemoveFirst n
s
else
let l = buffer.Length - n
let chars = Array.zeroCreate l
reader.ReadBlock(chars, 0, n) |> ignore
let s = buffer.Content
buffer.Clear()
s + String(chars)
member x.Peek() =
if buffer.IsNullOrEmpty()
then reader.Peek()
else buffer.Chars 0 |> int
member x.Read() = x.ReadNChar 1
member x.Pop() = x.Read() |> ignore
member x.Pop(count) =
[|0..(count-1)|] |> Array.map (fun _ -> x.ReadChar())
member x.ReadChar() =
if buffer.IsNullOrEmpty()
then x.Read() |> char
else
let c = buffer.Chars 0
buffer.RemoveFirst 1
c
member x.PeekNChar n =
if n <= 1
then [|x.PeekChar()|]
elif buffer.IsNullOrEmpty() then
let chars = Array.zeroCreate n
reader.ReadBlock(chars, 0, n) |> ignore
let s = String chars
buffer.SetContent s
let b = buffer.ToCharArray()
b
else
let l = n - buffer.Length
let chars = Array.zeroCreate l
reader.ReadBlock(chars, 0, l) |> ignore
buffer.SetContent (buffer.Content + String(chars))
buffer.ToCharArray()
type CharList =
{ mutable Contents : char list }
static member Empty = { Contents = [] }
override x.ToString() = String(x.Contents |> List.rev |> List.toArray)
member x.Acc c = x.Contents <- c :: x.Contents
member x.Length = x.Contents.Length
member x.Clear() = x.Contents <- []
type ParsingError =
{ Message:string }
[<AbstractClass>]
type StateBase<'t, 'c> (txt:TextReader, defaultContext:'c) =
let content : CharList ref = ref CharList.Empty
let tokens : 't list ref = ref List.Empty
let context : 'c ref = ref defaultContext
let reader : SubBufferedTextReader = SubBufferedTextReader(txt)
let errors : ParsingError list ref = ref List.Empty
let bag : string list ref = ref List.Empty
member x.Pop() = reader.Read() |> ignore
member x.Peek() = reader.PeekChar()
member x.PeekN n = reader.PeekNChar n
member x.Pop(count) =
[|0..(count-1)|] |> Array.map (fun _ -> reader.ReadChar()) |> ignore
member x.Contents = (!content).ToString().Trim()
member x.ContentLength = (!content).Length
member x.Acc() = (!content).Acc(reader.ReadChar())
member x.ClearContent() = content := CharList.Empty
member x.Emit (token:'t) =
tokens := token :: !tokens
x.ClearContent()
member x.SwithContext c = context := c
member x.EmitWith (f : unit -> 't) = x.Emit(f())
member x.Content with get () = !content
member x.Tokens with get () = !tokens |> List.rev
member x.Context with get () = !context
member x.Reader with get () = reader
member x.AddError e =
errors := e :: !errors
member x.Errors with get () = !errors
member x.ClearBag() = bag := List.Empty
member x.PushBag s = bag := s :: !bag
member x.Bag with get() = !bag
member x.ClearTokens() =
errors := List.Empty
type TokenizingResult<'t> =
{ Tokens: 't list
Errors : ParsingError list }
[<AbstractClass>]
type TokenizerBase<'t,'c,'s when 's :> StateBase<'t,'c>>(stream:Stream) =
let reader = new StreamReader(stream)
let state : 's option ref = ref None
abstract member CreateState : StreamReader -> 's
abstract member Accumulate : 's -> unit
abstract member StopParsing : 's -> bool
member private x.InitState() =
match !state with
| Some s -> s
| None ->
let s = x.CreateState reader
state := Some s
s
member x.Parse() =
let s = x.InitState()
let next = ref (s.Reader.Peek())
try
while !next > 0 && not (x.StopParsing s) do
x.Accumulate s
next := s.Reader.Peek()
with
| _ -> ()
{ Tokens = s.Tokens; Errors = s.Errors }
type ParsingResult<'t, 'e> =
| Success of model:'t
| Failure of errors:'e list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment