Skip to content

Instantly share code, notes, and snippets.

@pulsation
Created September 20, 2013 13:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pulsation/6637314 to your computer and use it in GitHub Desktop.
Save pulsation/6637314 to your computer and use it in GitHub Desktop.
Use scala.xml._ to parse atom rss according to dublin core namespaces
package rss
import scala.xml._
class Feed(val url: String) {
def downloadItems(): List[Item] = {
val root = XML.load(url)
(root \\ "item").map(buildItem(_)).toList
}
def buildItem(node: Node): Item = {
new Item(this,
(node \\ "title").text,
(node \\ "guid").text,
((node \\ "date") filter (n => n.namespace == "http://purl.org/dc/elements/1.1/"):NodeSeq).text,
((node \\ "encoded") filter (n => n.namespace == "http://purl.org/rss/1.0/modules/content/"):NodeSeq).text)
}
}
class Item(
val parent: Feed,
val title: String,
val link: String,
val pubDate: String,
val content: String) {
override def toString(): String = {
"Title : " + title + " Link: " + link + " Date: " + pubDate
}
}
object Feed {
def main(args: Array[String]) = {
val feed = new Feed("http://agriculture.gouv.fr/spip.php?page=backend&id_rubrique=460")
val feedList = feed.downloadItems
feedList.foreach(println)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment