Last active
August 29, 2015 14:03
-
-
Save 7shi/5845e4a899aa48810509 to your computer and use it in GitHub Desktop.
[F#]簡易 XML Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Text | |
let fromEntity(s: string) = | |
s.Replace("<", "<"). | |
Replace(">", ">"). | |
Replace(""", "\""). | |
Replace(" ", " "). | |
Replace("&", "&") | |
type XmlParser(s: TextReader) = | |
let mutable stream: TextReader = s | |
let mutable text, tag, reserved = "", "", "" | |
let mutable values = new System.Collections.Generic.Dictionary<string, string>() | |
let mutable current = 0 | |
member this.Stream = stream | |
member this.Text = text | |
member this.Tag = tag | |
new(src) = new XmlParser(new StringReader(src)) | |
interface IDisposable with | |
member this.Dispose() = this.Dispose() | |
member this.Dispose() = | |
if stream <> null then stream.Dispose() | |
stream <- null | |
member this.Item | |
with get(k) = | |
let result, r = values.TryGetValue k | |
if result then r else null | |
member this.Read() = | |
text <- "" | |
tag <- "" | |
values.Clear() | |
if stream = null then false else | |
if reserved <> "" then | |
tag <- reserved | |
reserved <- "" | |
else | |
this.ReadText() | |
true | |
member this.Search(t) = this.Search(t, fun() -> true) | |
member this.Search(t, f: unit -> bool) = | |
let rec loop() = | |
if not <| this.Read() then false else | |
if tag = t && f() then true else | |
loop() | |
loop() | |
member this.SearchEach(t, f: unit -> bool, a: unit -> unit) = | |
let e = "/" + tag | |
while this.Read() && tag <> e do | |
if tag = t && f() then a() | |
member this.ReadChar() = | |
if stream = null then | |
current <- -1 | |
else | |
current <- stream.Read() | |
if current = -1 then this.Dispose() | |
current | |
member this.ReadText() = | |
let t = new StringBuilder() | |
while this.ReadChar() <> -1 && current <> int '<' do | |
ignore <| t.Append (char current) | |
text <- fromEntity(t.ToString()) | |
if current = int '<' then this.ReadTag() | |
member this.ReadTag() = | |
let t = new StringBuilder() | |
let rec loop() = | |
if this.ReadChar() = -1 then char 0 else | |
let ch = char current | |
if ch = '>' || (ch = '/' && t.Length > 0) then | |
ch | |
elif ch > ' ' then | |
ignore <| t.Append ch | |
if t.Length = 3 && t.ToString() = "!--" then | |
ch else loop() | |
elif t.Length > 0 then ch else loop() | |
let mutable ch = loop() | |
tag <- t.ToString().ToLower() | |
if ch = '/' then | |
reserved <- "/" + tag | |
ch <- char(this.ReadChar()) | |
if ch <> '>' then | |
if tag = "!--" then | |
this.ReadComment() | |
else | |
while this.ReadAttribute() do () | |
member this.ReadComment() = | |
let cm = new StringBuilder() | |
let rec loop m = | |
if this.ReadChar() = -1 then () else | |
let ch = char current | |
if ch = '>' && m >= 2 then | |
cm.Length <- cm.Length - 2 | |
else | |
ignore <| cm.Append ch | |
loop (if ch = '-' then m + 1 else 0) | |
loop 0 | |
values.["comment"] <- cm.ToString() | |
member this.ReadAttribute() = | |
let rec loop() = | |
let nm = this.ReadValue(true) | |
let ch = char current | |
if ch = '/' then | |
reserved <- "/" + tag | |
loop() | |
elif nm <> "" then ch, nm else loop() | |
let ch, nm = loop() | |
if nm = null then false else | |
values.[nm.ToLower()] <- if ch = '=' then this.ReadValue(false) else "" | |
current <> int '>' | |
member this.ReadValue(isLeft) = | |
let v = new StringBuilder() | |
let rec loop() = | |
if this.ReadChar() = -1 then () else | |
let ch = char current | |
if ch = '>' || (isLeft && (ch = '=' || ch = '/')) then | |
() | |
elif ch = '"' then | |
while this.ReadChar() <> -1 && current <> int '"' do | |
ignore <| v.Append (char current) | |
elif ch > ' ' then | |
ignore <| v.Append ch | |
loop() | |
elif v.Length > 0 then () else loop() | |
loop() | |
if v.Length = 0 then null else v.ToString() | |
member this.Nodes = seq { | |
while this.Read() do yield this } | |
member this.Children = seq { | |
let e = "/" + this.Tag | |
while this.Read() && this.Tag <> e do yield this } | |
member this.SearchIteri t = seq { | |
let n = ref 0 | |
for _ in this.Children do | |
if this.Tag = t then | |
yield !n | |
n := !n + 1 } | |
[<EntryPoint>] | |
let test args = | |
use xp = new XmlParser("<hoge>a<fuga>b<foo /><bar/></fuga>c</hoge>") | |
while xp.Read() do | |
printfn "%s, %s" xp.Text xp.Tag | |
0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment