Created
March 23, 2011 12:00
-
-
Save nbogie/883000 to your computer and use it in GitHub Desktop.
Corrected PodParser.hs example for HaXml 1.20.2 mostly following http://book.realworldhaskell.org/read/extended-example-web-client-programming.html#comment12828
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-- snippet all --} | |
module PodParser where | |
import PodTypes | |
import Text.XML.HaXml | |
import Text.XML.HaXml.Parse | |
import Text.XML.HaXml.Posn | |
import Text.XML.HaXml.Html.Generate(showattr) | |
import Data.Char | |
import Data.List | |
data PodItem = PodItem {itemtitle :: String, | |
enclosureurl :: String | |
} | |
deriving (Eq, Show, Read) | |
data Feed = Feed {channeltitle :: String, | |
items :: [PodItem]} | |
deriving (Eq, Show, Read) | |
{- | Given a podcast and an PodItem, produce an Episode -} | |
item2ep :: Podcast -> PodItem -> Episode | |
item2ep pc item = | |
Episode {epId = 0, | |
epCast = pc, | |
epURL = enclosureurl item, | |
epDone = False} | |
{- | Parse the data from a given string, with the given name to use | |
in error messages. -} | |
parse :: String -> String -> Feed | |
parse content name = | |
Feed {channeltitle = getTitle doc, | |
items = getEnclosures doc} | |
where parseResult = xmlParse name (stripUnicodeBOM content) | |
doc = getContent parseResult | |
getContent :: Document Posn -> Content Posn | |
getContent (Document _ _ e _) = CElem e noPos | |
{- | Some Unicode documents begin with a binary sequence; | |
strip it off before processing. -} | |
stripUnicodeBOM :: String -> String | |
stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x | |
stripUnicodeBOM x = x | |
{- | Pull out the channel part of the document. | |
Note that HaXml defines CFilter as: | |
> type CFilter i = Content i -> [Content i] | |
-} | |
channel :: CFilter Posn | |
channel = tag "rss" /> tag "channel" | |
getTitle :: Content Posn -> String | |
getTitle doc = | |
contentToStringDefault "Untitled Podcast" | |
(channel /> tag "title" /> txt $ doc) | |
getEnclosures :: Content Posn -> [PodItem] | |
getEnclosures doc = | |
concatMap procPodItem $ getPodItems doc | |
where procPodItem :: Content Posn -> [PodItem] | |
procPodItem item = concatMap (procEnclosure title) enclosure | |
where title = contentToStringDefault "Untitled Episode" | |
(keep /> tag "title" /> txt $ item) | |
enclosure = (keep /> tag "enclosure") item | |
getPodItems :: CFilter Posn | |
getPodItems = channel /> tag "item" | |
procEnclosure :: String -> Content Posn -> [PodItem] | |
procEnclosure title enclosure = | |
map makePodItem (showattr "url" enclosure) | |
where makePodItem :: Content Posn -> PodItem | |
makePodItem x = PodItem {itemtitle = title, | |
enclosureurl = contentToString [x]} | |
{- | Convert [Content] to a printable String, with a default if the | |
passed-in [Content] is [], signifying a lack of a match. -} | |
contentToStringDefault :: String -> [Content Posn] -> String | |
contentToStringDefault msg [] = msg | |
contentToStringDefault _ x = contentToString x | |
{- | Convert [Content Posn] to a printable string, taking care to unescape it. | |
An implementation without unescaping would simply be: | |
> contentToString = concatMap (show . content) | |
Because HaXml's unescaping only works on Elements, we must make sure that | |
whatever Content we have is wrapped in an Element, then use txt to | |
pull the insides back out. -} | |
contentToString :: [Content Posn] -> String | |
contentToString = | |
concatMap procContent | |
where procContent x = | |
verbatim $ keep /> txt $ CElem (unesc (fakeElem x)) noPos | |
fakeElem :: Content Posn -> Element Posn | |
fakeElem x = Elem "fake" [] [x] | |
unesc :: Element Posn -> Element Posn | |
unesc = xmlUnEscape stdXmlEscaper | |
{-- /snippet all --} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment