Created
May 8, 2020 10:59
-
-
Save 7shi/de072cd867f3899aa285bc9343110bff to your computer and use it in GitHub Desktop.
[F#] XML Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Text | |
let entities = | |
( | |
"acirc,â;aelig,æ;agrave,à;amp,&;auml,ä;ccedil,ç;deg,°;eacute,é;" + | |
"ecirc,ê;egrave,è;euml,ë;gt,>;hellip,…;iuml,ï;laquo,«;ldquo,“;" + | |
"lsaquo,‹;lt,<;mdash,—;nbsp, ;ndash,–;ocirc,ô;oelig,œ;ordf,ª;" + | |
"ordm,º;ouml,ö;quot,\";raquo,»;rdquo,”;rsaquo,›;rsquo,’;shy,;" + | |
"sup3,³;uacute,ú;uuml,ü").Split ';' | |
|> Seq.map(fun s -> let s = s.Split(',') in s.[0], s.[1]) | |
|> Map.ofSeq | |
let getEntity(src: string) = | |
if src.Length < 2 || src.[0] <> '&' || src.[src.Length - 1] <> ';' then src else | |
let l = src.ToLower() | |
match entities.TryFind l.[1 .. l.Length - 2] with | |
| Some i -> if src = l then i else i.ToUpper() | |
| None -> src | |
let fromEntity(src: string) = | |
let rec f (a:string) (src:string) = | |
let s = src.IndexOf('&') | |
if s < 0 then a + src else | |
let e = src.IndexOf(';', s) | |
if e < 0 then a + src else | |
let before = if s = 0 then "" else src.[.. s - 1] | |
f (a + before + getEntity src.[s .. e]) src.[e + 1 ..] | |
f "" src | |
type XmlParser(s: TextReader) = | |
let mutable stream: TextReader = s | |
let mutable text, tag, reserved = "", "", "" | |
let mutable values = new System.Collections.Generic.Dictionary<string, string>() | |
let mutable current = 0 | |
member this.Stream = stream | |
member this.Text = text | |
member this.Tag = tag | |
new(src) = new XmlParser(new StringReader(src)) | |
interface IDisposable with | |
member this.Dispose() = this.Dispose() | |
member this.Dispose() = | |
match stream with null -> () | _ -> stream.Dispose() | |
stream <- null | |
member this.Item | |
with get(k) = | |
let result, r = values.TryGetValue k | |
if result then r else null | |
member this.Read() = | |
text <- "" | |
tag <- "" | |
values.Clear() | |
if isNull stream then false else | |
if reserved <> "" then | |
tag <- reserved | |
reserved <- "" | |
else | |
this.ReadText() | |
true | |
member this.Read(n) = | |
let mutable i = 0 | |
while i < n && this.Read() do | |
i <- i + 1 | |
i = n | |
member this.Search(t) = this.Search(t, (fun() -> true), "") | |
member this.Search(t, f) = this.Search(t, f, "") | |
member this.Search(t, f: unit -> bool, endTag) = | |
let e = if String.IsNullOrEmpty endTag then "" else "/" + endTag | |
seq { | |
while this.Read() && tag <> e do | |
if tag = t && f() then yield true | |
yield false | |
} |> Seq.head | |
member this.SearchEach(t) = this.SearchEach(t, (fun() -> true), "") | |
member this.SearchEach(t, f) = this.SearchEach(t, f, "") | |
member this.SearchEach(t, f: unit -> bool, endTag) = seq { | |
let e = "/" + (if String.IsNullOrEmpty endTag then tag else endTag) | |
while this.Read() && tag <> e do | |
if tag = t && f() then yield this } | |
member this.ReadChar() = | |
if isNull stream then | |
current <- -1 | |
else | |
current <- stream.Read() | |
if current = -1 then this.Dispose() | |
current | |
member this.ReadText() = | |
let t = StringBuilder() | |
while this.ReadChar() <> -1 && current <> int '<' do | |
ignore <| t.Append (char current) | |
text <- fromEntity(t.ToString()) | |
if current = int '<' then this.ReadTag() | |
member this.ReadTag() = | |
let t = StringBuilder() | |
let rec loop() = | |
if this.ReadChar() = -1 then char 0 else | |
let ch = char current | |
if ch = '>' || (ch = '/' && t.Length > 0) then | |
ch | |
elif ch > ' ' then | |
ignore <| t.Append ch | |
if t.Length = 3 && t.ToString() = "!--" then | |
ch else loop() | |
elif t.Length > 0 then ch else loop() | |
let mutable ch = loop() | |
tag <- t.ToString().ToLower() | |
if ch = '/' then | |
reserved <- "/" + tag | |
ch <- char(this.ReadChar()) | |
if ch <> '>' then | |
if tag = "!--" then | |
this.ReadComment() | |
else | |
while this.ReadAttribute() do () | |
member this.ReadComment() = | |
let cm = StringBuilder() | |
let rec loop m = | |
if this.ReadChar() = -1 then () else | |
let ch = char current | |
if ch = '>' && m >= 2 then | |
cm.Length <- cm.Length - 2 | |
else | |
ignore <| cm.Append ch | |
loop (if ch = '-' then m + 1 else 0) | |
loop 0 | |
values.["comment"] <- cm.ToString() | |
member this.ReadAttribute() = | |
let rec loop() = | |
let nm = this.ReadValue(true) | |
let ch = char current | |
if ch = '/' then | |
reserved <- "/" + tag | |
loop() | |
elif nm <> "" then ch, nm else loop() | |
let ch, nm = loop() | |
if isNull nm then false else | |
values.[nm.ToLower()] <- if ch = '=' then this.ReadValue(false) else "" | |
current <> int '>' | |
member this.ReadValue(isLeft) = | |
let v = StringBuilder() | |
let rec loop() = | |
if this.ReadChar() = -1 then () else | |
let ch = char current | |
if ch = '>' || (isLeft && (ch = '=' || ch = '/')) then | |
() | |
elif ch = '"' || ch = '\'' then | |
let q = int ch | |
while this.ReadChar() <> -1 && current <> q do | |
ignore <| v.Append (char current) | |
elif ch > ' ' then | |
ignore <| v.Append ch | |
loop() | |
elif v.Length > 0 then () else loop() | |
loop() | |
if v.Length = 0 then null else v.ToString() | |
member this.Nodes = seq { | |
while this.Read() do yield this } | |
member this.Children = seq { | |
let e = "/" + this.Tag | |
while this.Read() && this.Tag <> e do yield this } | |
member this.SearchIteri t = seq { | |
let n = ref 0 | |
for _ in this.Children do | |
if this.Tag = t then | |
yield !n | |
n := !n + 1 } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment