Created
May 23, 2011 13:47
-
-
Save wspringer/986713 to your computer and use it in GitHub Desktop.
Just in case we ever need absolute URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Drop in replacement for the original {{ExtensibleReadability}} class, allowing us to add some of our own processing. (Like augmenting image URLs | |
* .) | |
*/ | |
class UltimateReadability(base: String) extends ExtensibleReadability with Logging { | |
val baseURI = URI.create(base) | |
val uriConversion = catching(classOf[IllegalArgumentException]) | |
override def cleanup(document: JSoupDocument) { | |
fixImageURIs(document) | |
} | |
/** | |
* Fixes all URIs by turning relative references in absolute references using, given the base URI passed to the constructor. | |
*/ | |
def fixImageURIs(doc: JSoupDocument) { | |
for { | |
elem <- doc.select("img") | |
src <- Option(elem.attr("src")) | |
uri <- uriConversion opt(URI.create(src)) | |
if (!uri.isAbsolute) | |
} { | |
debug("Fixing %s".format(uri)) | |
val replacement = baseURI.resolve(uri) | |
elem.attr("src", replacement.toASCIIString) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment