Skip to content

Instantly share code, notes, and snippets.

@junaidk
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save junaidk/3af2fef646c0c6663f1d to your computer and use it in GitHub Desktop.
Save junaidk/3af2fef646c0c6663f1d to your computer and use it in GitHub Desktop.
WallBase Scrapping
import urllib2
fs = open("../satish.txt","r")
for line in fs:
wfile = urllib2.urlopen(line)
fileName = line.split("/")[4]
fileName = fileName.replace("\n","")
output = open("images/"+fileName,'wb')
output.write(wfile.read())
output.close()
print(fileName + ' downloaded')
// using "https://github.com/junaidk/scraperjs" and node.js
// wallbase dynamic url
// http://wallbase.cc/toplist/index/##
// 31
// http://wallbase.cc/search/index/##?tag=8023
// ## addition of 32
// e.g 32,64,96,...
var scraperjs = require('scraperjs');
for(i=1; i<4 ; i++){
console.log("page# : " + i)
var index = 32;
scraperjs.StaticScraper.create("http://wallbase.cc/search/index/"+(index*i)+'?tag=8023')
.scrape(function($) {
return $(".wrapper a").map(function() {
var href = $(this).attr('href');
if (href.indexOf("http") > -1){
return href;
}
}).get();
}, function(news) {
news.forEach( function (ele){
//console.log("=> " + ele);
scraperjs.StaticScraper.create(ele)
.scrape(function($) {
return $(".content img").map(function() {
var src = $(this).attr('src');
return src;
}).get();
}, function(imgSrc) {
console.log(imgSrc);
var fs = require("fs");
fs.appendFile("Img-links.txt", imgSrc+"\n");
})
});
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment