Skip to content

Instantly share code, notes, and snippets.

@7shi
Created May 8, 2020 10:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 7shi/de072cd867f3899aa285bc9343110bff to your computer and use it in GitHub Desktop.
Save 7shi/de072cd867f3899aa285bc9343110bff to your computer and use it in GitHub Desktop.
[F#] XML Parser
open System
open System.IO
open System.Text
let entities =
(
"acirc,â;aelig,æ;agrave,à;amp,&;auml,ä;ccedil,ç;deg,°;eacute,é;" +
"ecirc,ê;egrave,è;euml,ë;gt,>;hellip,…;iuml,ï;laquo,«;ldquo,“;" +
"lsaquo,‹;lt,<;mdash,—;nbsp, ;ndash,–;ocirc,ô;oelig,œ;ordf,ª;" +
"ordm,º;ouml,ö;quot,\";raquo,»;rdquo,”;rsaquo,›;rsquo,’;shy,­;" +
"sup3,³;uacute,ú;uuml,ü").Split ';'
|> Seq.map(fun s -> let s = s.Split(',') in s.[0], s.[1])
|> Map.ofSeq
let getEntity(src: string) =
if src.Length < 2 || src.[0] <> '&' || src.[src.Length - 1] <> ';' then src else
let l = src.ToLower()
match entities.TryFind l.[1 .. l.Length - 2] with
| Some i -> if src = l then i else i.ToUpper()
| None -> src
let fromEntity(src: string) =
let rec f (a:string) (src:string) =
let s = src.IndexOf('&')
if s < 0 then a + src else
let e = src.IndexOf(';', s)
if e < 0 then a + src else
let before = if s = 0 then "" else src.[.. s - 1]
f (a + before + getEntity src.[s .. e]) src.[e + 1 ..]
f "" src
type XmlParser(s: TextReader) =
let mutable stream: TextReader = s
let mutable text, tag, reserved = "", "", ""
let mutable values = new System.Collections.Generic.Dictionary<string, string>()
let mutable current = 0
member this.Stream = stream
member this.Text = text
member this.Tag = tag
new(src) = new XmlParser(new StringReader(src))
interface IDisposable with
member this.Dispose() = this.Dispose()
member this.Dispose() =
match stream with null -> () | _ -> stream.Dispose()
stream <- null
member this.Item
with get(k) =
let result, r = values.TryGetValue k
if result then r else null
member this.Read() =
text <- ""
tag <- ""
values.Clear()
if isNull stream then false else
if reserved <> "" then
tag <- reserved
reserved <- ""
else
this.ReadText()
true
member this.Read(n) =
let mutable i = 0
while i < n && this.Read() do
i <- i + 1
i = n
member this.Search(t) = this.Search(t, (fun() -> true), "")
member this.Search(t, f) = this.Search(t, f, "")
member this.Search(t, f: unit -> bool, endTag) =
let e = if String.IsNullOrEmpty endTag then "" else "/" + endTag
seq {
while this.Read() && tag <> e do
if tag = t && f() then yield true
yield false
} |> Seq.head
member this.SearchEach(t) = this.SearchEach(t, (fun() -> true), "")
member this.SearchEach(t, f) = this.SearchEach(t, f, "")
member this.SearchEach(t, f: unit -> bool, endTag) = seq {
let e = "/" + (if String.IsNullOrEmpty endTag then tag else endTag)
while this.Read() && tag <> e do
if tag = t && f() then yield this }
member this.ReadChar() =
if isNull stream then
current <- -1
else
current <- stream.Read()
if current = -1 then this.Dispose()
current
member this.ReadText() =
let t = StringBuilder()
while this.ReadChar() <> -1 && current <> int '<' do
ignore <| t.Append (char current)
text <- fromEntity(t.ToString())
if current = int '<' then this.ReadTag()
member this.ReadTag() =
let t = StringBuilder()
let rec loop() =
if this.ReadChar() = -1 then char 0 else
let ch = char current
if ch = '>' || (ch = '/' && t.Length > 0) then
ch
elif ch > ' ' then
ignore <| t.Append ch
if t.Length = 3 && t.ToString() = "!--" then
ch else loop()
elif t.Length > 0 then ch else loop()
let mutable ch = loop()
tag <- t.ToString().ToLower()
if ch = '/' then
reserved <- "/" + tag
ch <- char(this.ReadChar())
if ch <> '>' then
if tag = "!--" then
this.ReadComment()
else
while this.ReadAttribute() do ()
member this.ReadComment() =
let cm = StringBuilder()
let rec loop m =
if this.ReadChar() = -1 then () else
let ch = char current
if ch = '>' && m >= 2 then
cm.Length <- cm.Length - 2
else
ignore <| cm.Append ch
loop (if ch = '-' then m + 1 else 0)
loop 0
values.["comment"] <- cm.ToString()
member this.ReadAttribute() =
let rec loop() =
let nm = this.ReadValue(true)
let ch = char current
if ch = '/' then
reserved <- "/" + tag
loop()
elif nm <> "" then ch, nm else loop()
let ch, nm = loop()
if isNull nm then false else
values.[nm.ToLower()] <- if ch = '=' then this.ReadValue(false) else ""
current <> int '>'
member this.ReadValue(isLeft) =
let v = StringBuilder()
let rec loop() =
if this.ReadChar() = -1 then () else
let ch = char current
if ch = '>' || (isLeft && (ch = '=' || ch = '/')) then
()
elif ch = '"' || ch = '\'' then
let q = int ch
while this.ReadChar() <> -1 && current <> q do
ignore <| v.Append (char current)
elif ch > ' ' then
ignore <| v.Append ch
loop()
elif v.Length > 0 then () else loop()
loop()
if v.Length = 0 then null else v.ToString()
member this.Nodes = seq {
while this.Read() do yield this }
member this.Children = seq {
let e = "/" + this.Tag
while this.Read() && this.Tag <> e do yield this }
member this.SearchIteri t = seq {
let n = ref 0
for _ in this.Children do
if this.Tag = t then
yield !n
n := !n + 1 }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment