Skip to content

Instantly share code, notes, and snippets.

@yuroyoro
Created October 26, 2009 10:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yuroyoro/218551 to your computer and use it in GitHub Desktop.
Save yuroyoro/218551 to your computer and use it in GitHub Desktop.
import java.net.URL
import java.awt.image.BufferedImage
import javax.imageio.ImageIO
import java.io._
import scala.xml._
import scala.io.Source
object TumblrCrawler {
def main( args:Array[String] ):Unit= {
val tumblrUrl = "http://%s.tumblr.com/api/read".format( args.first )
val r = """http\:\/\/.+/([^\/]+)*$""".r
val extR = """.+\.(.+)$""".r
def crawlingTumblrImages( cnt:Int ):Unit = {
val url = tumblrUrl + "?type=photo&start=%d&num=50".format( cnt )
println( url )
val source = Source.fromURL( url )
val xml = XML.loadString( source.getLines.mkString )
val photos = xml \\ "post"
photos size match {
// 取れなくなったら終了
case 0 => None
// 画像をファイルに書き出して再帰
case _ =>
// もっとも解像度の高いURLを取り出す
for( photo <- photos ) {
val ps = photo \ "photo-url"
val imageUrl = ( ps.first /: ps ){ (p1:Node, p2:Node) => {
def getSize( node:Node ) = ( node \ "@max-width" text ).toInt
if( getSize( p1 ) > getSize( p2 ) ) p1 else p2
}}.text
saveImage( imageUrl )
}
crawlingTumblrImages( cnt + 50 )
}
}
def saveImage( url:String ) = {
val r(fname) = url
val (ext,file) = fname match {
case extR(e) => (e, new File( fname ) )
case _ => ( "png", new File( fname + ".png" ) )
}
val img = ImageIO.read( new URL( url ) )
ImageIO.write( img , ext , file )
Thread.sleep( 1000 )
println( "Download:" + fname )
}
crawlingTumblrImages( 0 )
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment