Skip to content

Instantly share code, notes, and snippets.

@fizerkhan
Created May 23, 2013 08:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save fizerkhan/5633503 to your computer and use it in GitHub Desktop.
Save fizerkhan/5633503 to your computer and use it in GitHub Desktop.
Extract largest image thumbnail from url using node and cheerio
var cheerio = require('cheerio')
, Shred = require('shred')
, shred = new Shred()
, http = require('http')
, URL = require('url');
var server = http.createServer(function(request, response) {
var url, urlToDiscover;
url = URL.parse(request.url, true);
urlToDiscover = url.query['url'];
return startDiscovery(urlToDiscover, function(theImageURL) {
if (theImageURL != null) {
response.writeHead(200, {
"Content-Type": 'application/json'
});
return response.end(JSON.stringify(theImageURL));
} else {
console.log("Did not find image for " + urlToDiscover);
response.writeHead(404, "Cant find theImage");
return response.end();
}
});
});
server.listen(process.env.PORT || 5000, '0.0.0.0');
var startDiscovery = function(url, cb) {
return shred.get({
url: url,
on: {
redirect: function(response) {
return discoverImage(response.request.url, null, cb);
},
200: function(response) {
return discoverImage(response.request.url, response.content.data, cb);
}
}
});
};
var discoverImage = function(url, data, cb) {
var discoveredURL;
discoveredURL = discoverImageFromURL(url);
if (data) {
discoveredURL || (discoveredURL = discoverImageFromData(url, data));
}
discoveredURL = cleanURL(url, discoveredURL);
return cb(discoveredURL);
};
var cleanURL = function(baseURL, discoveredURL) {
if (!discoveredURL) {
return null;
}
if (discoveredURL.indexOf("/") === 0) {
return "" + baseURL + discoveredURL;
}
return discoveredURL;
};
var discoverImageFromURL = function(url) {
console.log(url.indexOf('yfrog'));
if (url.indexOf('yfrog') !== -1) {
return url + ":medium";
}
};
var discoverImageFromData = function(url, data) {
var $, currDimension, image, images, maxDimension, maxImage, _i, _len;
$ = cheerio.load(data);
images = $('img');
maxImage = null;
maxDimension = 0;
for (_i = 0, _len = images.length; _i < _len; _i++) {
image = images[_i];
currDimension = $(image).attr('width') * $(image).attr('height');
if (currDimension > maxDimension) {
maxDimension = currDimension;
maxImage = image;
}
}
return $(maxImage).attr('src');
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment