Skip to content

Instantly share code, notes, and snippets.

@techgaun
Created July 1, 2015 21:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save techgaun/cf37344a758f377a313f to your computer and use it in GitHub Desktop.
Save techgaun/cf37344a758f377a313f to your computer and use it in GitHub Desktop.
//required node modules
//untested
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var s3 = require('s3');
var AWS = require('aws-sdk');
var url = 'https://medium.com/@benjaminhardy/8-things-every-person-should-do-before-8-a-m-cc0233e15c8d'; //url of site
AWS.config.loadFromPath('./config.json');
var s3bucket = new AWS.S3({params: {Bucket: 'crawlerdemo'}});
request(url, function(err, resp, body){
$ = cheerio.load(body);
links = $('img'); //get the image tags
$(links).each(function(i, link){ //go through every link on page
image_path = $(link).attr('src');
var tmp = image_path.lastIndexOf('/');
var filename = image_path.substring(tmp+1, image_path.length); //set file name
download(url, filename, function(){
fs.readFile(filename, function(err, data) {
if (err) {
throw err;
}
var bdata = new Buffer(data, 'binary');
var params = {Key: filename, Body: bdata};
s3bucket.upload(params, function(err,data) {
if (err) {
console.log("Error: ", err);
}
});
});
});
});
});
//download method
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
@meetzaveri
Copy link

@techgaun , This worked for me after 3 hours of scraping internet. Thanks for the gist!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment