Skip to content

Instantly share code, notes, and snippets.

@mrico
Created October 16, 2010 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrico/629892 to your computer and use it in GitHub Desktop.
Save mrico/629892 to your computer and use it in GitHub Desktop.
Basic script to import comments from a Apache Roller site to disqus.com.
#!/bin/sh
exec scala "$0" "$@"
!#
/*
Basic script to import comments from a Apache Roller site to disqus.com.
The script reads the entries and comments rss feeds from the roller website and generates a simple wxr file
that can be imported by disqus.com.
usage: roller_wxr_exporter.scala http://your_domain.com/your_blog
*/
import java.text.SimpleDateFormat
import scala.xml._
import java.net.URL
import java.io.FileWriter
if(argv.size != 1) {
println("usage: roller_wxr_exporter.scala http://your_domain.com/your_blog")
exit(1)
}
val uriBase = argv(0)
var commentId = 0
val dateFormatIn = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z")
val dateFormatOut = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val posts = XML.load(new URL(uriBase + "/feed/entries/rss"))
val comments = XML.load(new URL(uriBase + "/feed/comments/rss"))
val items = (posts \\ "item") flatMap(item => toItem(item, getComments(item).map(c => toWxrComment(c))))
val writer = new FileWriter("wxr.xml")
XML.write(writer, assembleWxr(posts, items), "UTF-8", true, null)
writer.close
def getComments(item: Node): NodeSeq = {
val guid = (item \ "guid").text
println(guid)
(comments \\ "item").filter(c => (c \ "guid").text.replaceAll("#.*$", "") == guid)
}
def toItem(item: Node, comments: NodeSeq) = {
<item>
<title>{ (item \ "title").text}</title>
<link>{ (item \ "guid").text}</link>
<pubDate>{ (item \ "pubDate").text}</pubDate>
<description/>
{comments}
</item>
}
def toWxrComment(item: Node) = {
commentId += 1
val date = dateFormatIn.parse((item \ "pubDate").text)
<wp:comment>
<wp:comment_id>{commentId}</wp:comment_id>
<wp:comment_author>{ (item \ "creator").text }</wp:comment_author>
<wp:comment_author_email />
<wp:comment_author_url />
<wp:comment_author_ip />
<wp:comment_date>{ dateFormatOut.format(date) }</wp:comment_date>
<wp:comment_content>{ (item \ "description").text }</wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
</wp:comment>
}
def assembleWxr(rss: Elem, items: Seq[NodeSeq]) = {
val channel = rss \\ "channel"
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.0/">
<channel>
<title>{ (channel \ "title").text}</title>
<link>{ (channel \ "link").text}</link>
<description>{ (channel \ "description").text}</description>
<pubDate>{ (channel \ "pubDate").text}</pubDate>
<generator>roller_wxr_exporter</generator>
<language>en</language>
{items}
</channel>
</rss>
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment