Skip to content

Instantly share code, notes, and snippets.

@harrah
Created October 10, 2012 15:42
Show Gist options
  • Save harrah/3866442 to your computer and use it in GitHub Desktop.
Save harrah/3866442 to your computer and use it in GitHub Desktop.
archive Google Code wiki to HTML, replace old wiki with links to archive
import org.jsoup._
import java.io.{File, FileWriter}
/*
libraryDependencies += "org.jsoup" % "jsoup" % "1.7.1"
scalaVersion := "2.10.0-M7"
*/
object Main {
val WikiSuffix = ".wiki"
val HtmlSuffix = ".html"
val Href = "href"
val WikiAuthorClass = "#wikiauthor"
val WikiLinkPrefix = "/p/simple-build-tool/wiki/"
val SbtMoved = "sbt_has_moved"
val RobotsMeta = """<meta name="robots" content="noindex, nofollow"></meta>"""
def main(args: Array[String]) {
val inDir = new File(args(0))
val outDir = new File(args(1))
for(page <- inDir.listFiles().take(1); name <- wikiPageName(page)) {
write(generateHtml(name), new File(outDir, name + HtmlSuffix), name)
write(newWikiContent(name), page, name)
}
}
def write(content: String, file: File, name: String)
{
println("Writing " + file.getAbsolutePath + "...")
file.getParentFile.mkdirs()
val out = new FileWriter(file)
try { out.write(content) }
finally { out.close() }
println("Transformed " + name)
}
def generateHtml(pagename: String): String =
{
val url = wikiURL(pagename)
println("Retrieving " + url)
val doc = Jsoup.connect(url).get();
trim(doc)
transformInternalLinks(doc)
addRobotsMeta(doc)
doc.outerHtml()
}
def addRobotsMeta(doc: nodes.Document)
{
doc.select("head").append(RobotsMeta)
}
def trim(doc: nodes.Document)
{
doc.select(WikiAuthorClass).remove()
doc.select(s"a.label[$Href*=label:Featured]").remove()
removeSbtMoved(doc)
}
def removeSbtMoved(doc: nodes.Document)
{
for(moved <- select(doc, s"h1 > a[name=$SbtMoved]")) {
val h1 = moved.parent
val next = h1.nextElementSibling
next.nextElementSibling.remove()
next.remove()
h1.remove()
}
for(a <- select(doc, s"a[href=#$SbtMoved]"))
a.parent.remove()
}
def transformInternalLinks(doc: nodes.Document)
{
for(a <- select(doc, s"a[$Href^=$WikiLinkPrefix]"))
a.attr(Href, a.attr(Href).stripPrefix(WikiLinkPrefix))
}
def select(doc: nodes.Document, s: String) =
{
import collection.JavaConverters._
doc.select(s).iterator.asScala
}
def wikiPageName(file: File): Option[String] =
if(file.getName.endsWith(WikiSuffix))
Some(file.getName.stripSuffix(WikiSuffix))
else
None
def wikiURL(name: String) = s"http://code.google.com/p/simple-build-tool/wiki/$name?show=content"
def newWikiContent(name: String): String = s"""
= sbt has moved =
*sbt is now hosted on !GitHub*: https://github.com/harrah/xsbt.
This page previously documented sbt 0.7.7, which is *no longer maintained*.
The original content is archived at http://www.scala-sbt.org/0.7.7/docs/$name.html
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment