Skip to content

Instantly share code, notes, and snippets.

@wspringer
Created May 23, 2011 13:47
Show Gist options
  • Save wspringer/986713 to your computer and use it in GitHub Desktop.
Save wspringer/986713 to your computer and use it in GitHub Desktop.
Just in case we ever need absolute URLs
/**
* Drop in replacement for the original {{ExtensibleReadability}} class, allowing us to add some of our own processing. (Like augmenting image URLs
* .)
*/
class UltimateReadability(base: String) extends ExtensibleReadability with Logging {
val baseURI = URI.create(base)
val uriConversion = catching(classOf[IllegalArgumentException])
override def cleanup(document: JSoupDocument) {
fixImageURIs(document)
}
/**
* Fixes all URIs by turning relative references in absolute references using, given the base URI passed to the constructor.
*/
def fixImageURIs(doc: JSoupDocument) {
for {
elem <- doc.select("img")
src <- Option(elem.attr("src"))
uri <- uriConversion opt(URI.create(src))
if (!uri.isAbsolute)
} {
debug("Fixing %s".format(uri))
val replacement = baseURI.resolve(uri)
elem.attr("src", replacement.toASCIIString)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment