Created
October 16, 2010 15:13
-
-
Save mrico/629892 to your computer and use it in GitHub Desktop.
Basic script to import comments from a Apache Roller site to disqus.com.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
exec scala "$0" "$@" | |
!# | |
/* | |
Basic script to import comments from a Apache Roller site to disqus.com. | |
The script reads the entries and comments rss feeds from the roller website and generates a simple wxr file | |
that can be imported by disqus.com. | |
usage: roller_wxr_exporter.scala http://your_domain.com/your_blog | |
*/ | |
import java.text.SimpleDateFormat | |
import scala.xml._ | |
import java.net.URL | |
import java.io.FileWriter | |
if(argv.size != 1) { | |
println("usage: roller_wxr_exporter.scala http://your_domain.com/your_blog") | |
exit(1) | |
} | |
val uriBase = argv(0) | |
var commentId = 0 | |
val dateFormatIn = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z") | |
val dateFormatOut = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") | |
val posts = XML.load(new URL(uriBase + "/feed/entries/rss")) | |
val comments = XML.load(new URL(uriBase + "/feed/comments/rss")) | |
val items = (posts \\ "item") flatMap(item => toItem(item, getComments(item).map(c => toWxrComment(c)))) | |
val writer = new FileWriter("wxr.xml") | |
XML.write(writer, assembleWxr(posts, items), "UTF-8", true, null) | |
writer.close | |
def getComments(item: Node): NodeSeq = { | |
val guid = (item \ "guid").text | |
println(guid) | |
(comments \\ "item").filter(c => (c \ "guid").text.replaceAll("#.*$", "") == guid) | |
} | |
def toItem(item: Node, comments: NodeSeq) = { | |
<item> | |
<title>{ (item \ "title").text}</title> | |
<link>{ (item \ "guid").text}</link> | |
<pubDate>{ (item \ "pubDate").text}</pubDate> | |
<description/> | |
{comments} | |
</item> | |
} | |
def toWxrComment(item: Node) = { | |
commentId += 1 | |
val date = dateFormatIn.parse((item \ "pubDate").text) | |
<wp:comment> | |
<wp:comment_id>{commentId}</wp:comment_id> | |
<wp:comment_author>{ (item \ "creator").text }</wp:comment_author> | |
<wp:comment_author_email /> | |
<wp:comment_author_url /> | |
<wp:comment_author_ip /> | |
<wp:comment_date>{ dateFormatOut.format(date) }</wp:comment_date> | |
<wp:comment_content>{ (item \ "description").text }</wp:comment_content> | |
<wp:comment_approved>1</wp:comment_approved> | |
</wp:comment> | |
} | |
def assembleWxr(rss: Elem, items: Seq[NodeSeq]) = { | |
val channel = rss \\ "channel" | |
<rss version="2.0" | |
xmlns:content="http://purl.org/rss/1.0/modules/content/" | |
xmlns:wfw="http://wellformedweb.org/CommentAPI/" | |
xmlns:dc="http://purl.org/dc/elements/1.1/" | |
xmlns:wp="http://wordpress.org/export/1.0/"> | |
<channel> | |
<title>{ (channel \ "title").text}</title> | |
<link>{ (channel \ "link").text}</link> | |
<description>{ (channel \ "description").text}</description> | |
<pubDate>{ (channel \ "pubDate").text}</pubDate> | |
<generator>roller_wxr_exporter</generator> | |
<language>en</language> | |
{items} | |
</channel> | |
</rss> | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment