Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Extract largest image thumbnail from url using node and cheerio
cheerio = require('cheerio')
Shred = require('shred')
shred = new Shred()
http = require('http')
URL = require('url')
server = http.createServer (request, response) ->
url = URL.parse(request.url, true)
urlToDiscover = url.query['url']
startDiscovery urlToDiscover, (theImageURL) ->
if theImageURL?
response.writeHead(200, "Content-Type": 'application/json')
response.end(JSON.stringify(theImageURL))
else
console.log("Did not find image for #{urlToDiscover}")
response.writeHead(404, "Cant find theImage")
response.end()
server.listen(process.env.PORT || 5000, '0.0.0.0')
startDiscovery = (url, cb) ->
shred.get
url: url
on:
redirect: (response) ->
discoverImage(response.request.url, null, cb)
200: (response) ->
discoverImage(response.request.url, response.content.data, cb)
discoverImage = (url, data, cb) ->
discoveredURL = discoverImageFromURL(url)
if data
discoveredURL ||= discoverImageFromData(url, data)
discoveredURL = cleanURL(url, discoveredURL)
cb(discoveredURL)
cleanURL = (baseURL, discoveredURL) ->
return null unless discoveredURL
return "#{baseURL}#{discoveredURL}" if discoveredURL.indexOf("/") == 0
return discoveredURL
discoverImageFromURL = (url) ->
console.log(url.indexOf('yfrog'))
if url.indexOf('yfrog') != -1
return url + ":medium"
discoverImageFromData = (url, data) ->
$ = cheerio.load(data)
images = $('img')
maxImage = null
maxDimension = 0
for image in images
currDimension = $(image).attr('width') * $(image).attr('height')
if currDimension > maxDimension
maxDimension = currDimension
maxImage = image
return $(maxImage).attr('src')
@fizerkhan
Copy link

fizerkhan commented May 23, 2013

Very nice.
If width and height is available in styles attribute or in css, how to read it?

Loading

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment