Skip to content

Instantly share code, notes, and snippets.

@nbogie
Created March 23, 2011 12:00
Show Gist options
  • Save nbogie/883000 to your computer and use it in GitHub Desktop.
Save nbogie/883000 to your computer and use it in GitHub Desktop.
Corrected PodParser.hs example for HaXml 1.20.2 mostly following http://book.realworldhaskell.org/read/extended-example-web-client-programming.html#comment12828
{-- snippet all --}
module PodParser where
import PodTypes
import Text.XML.HaXml
import Text.XML.HaXml.Parse
import Text.XML.HaXml.Posn
import Text.XML.HaXml.Html.Generate(showattr)
import Data.Char
import Data.List
data PodItem = PodItem {itemtitle :: String,
enclosureurl :: String
}
deriving (Eq, Show, Read)
data Feed = Feed {channeltitle :: String,
items :: [PodItem]}
deriving (Eq, Show, Read)
{- | Given a podcast and an PodItem, produce an Episode -}
item2ep :: Podcast -> PodItem -> Episode
item2ep pc item =
Episode {epId = 0,
epCast = pc,
epURL = enclosureurl item,
epDone = False}
{- | Parse the data from a given string, with the given name to use
in error messages. -}
parse :: String -> String -> Feed
parse content name =
Feed {channeltitle = getTitle doc,
items = getEnclosures doc}
where parseResult = xmlParse name (stripUnicodeBOM content)
doc = getContent parseResult
getContent :: Document Posn -> Content Posn
getContent (Document _ _ e _) = CElem e noPos
{- | Some Unicode documents begin with a binary sequence;
strip it off before processing. -}
stripUnicodeBOM :: String -> String
stripUnicodeBOM ('\xef':'\xbb':'\xbf':x) = x
stripUnicodeBOM x = x
{- | Pull out the channel part of the document.
Note that HaXml defines CFilter as:
> type CFilter i = Content i -> [Content i]
-}
channel :: CFilter Posn
channel = tag "rss" /> tag "channel"
getTitle :: Content Posn -> String
getTitle doc =
contentToStringDefault "Untitled Podcast"
(channel /> tag "title" /> txt $ doc)
getEnclosures :: Content Posn -> [PodItem]
getEnclosures doc =
concatMap procPodItem $ getPodItems doc
where procPodItem :: Content Posn -> [PodItem]
procPodItem item = concatMap (procEnclosure title) enclosure
where title = contentToStringDefault "Untitled Episode"
(keep /> tag "title" /> txt $ item)
enclosure = (keep /> tag "enclosure") item
getPodItems :: CFilter Posn
getPodItems = channel /> tag "item"
procEnclosure :: String -> Content Posn -> [PodItem]
procEnclosure title enclosure =
map makePodItem (showattr "url" enclosure)
where makePodItem :: Content Posn -> PodItem
makePodItem x = PodItem {itemtitle = title,
enclosureurl = contentToString [x]}
{- | Convert [Content] to a printable String, with a default if the
passed-in [Content] is [], signifying a lack of a match. -}
contentToStringDefault :: String -> [Content Posn] -> String
contentToStringDefault msg [] = msg
contentToStringDefault _ x = contentToString x
{- | Convert [Content Posn] to a printable string, taking care to unescape it.
An implementation without unescaping would simply be:
> contentToString = concatMap (show . content)
Because HaXml's unescaping only works on Elements, we must make sure that
whatever Content we have is wrapped in an Element, then use txt to
pull the insides back out. -}
contentToString :: [Content Posn] -> String
contentToString =
concatMap procContent
where procContent x =
verbatim $ keep /> txt $ CElem (unesc (fakeElem x)) noPos
fakeElem :: Content Posn -> Element Posn
fakeElem x = Elem "fake" [] [x]
unesc :: Element Posn -> Element Posn
unesc = xmlUnEscape stdXmlEscaper
{-- /snippet all --}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment