Skip to content

Instantly share code, notes, and snippets.

@mrjjwright
Created August 2, 2012 19:41
Show Gist options
  • Save mrjjwright/3240020 to your computer and use it in GitHub Desktop.
Save mrjjwright/3240020 to your computer and use it in GitHub Desktop.
Extract largest image thumbnail from url using node and cheerio
cheerio = require('cheerio')
Shred = require('shred')
shred = new Shred()
http = require('http')
URL = require('url')
server = http.createServer (request, response) ->
url = URL.parse(request.url, true)
urlToDiscover = url.query['url']
startDiscovery urlToDiscover, (theImageURL) ->
if theImageURL?
response.writeHead(200, "Content-Type": 'application/json')
response.end(JSON.stringify(theImageURL))
else
console.log("Did not find image for #{urlToDiscover}")
response.writeHead(404, "Cant find theImage")
response.end()
server.listen(process.env.PORT || 5000, '0.0.0.0')
startDiscovery = (url, cb) ->
shred.get
url: url
on:
redirect: (response) ->
discoverImage(response.request.url, null, cb)
200: (response) ->
discoverImage(response.request.url, response.content.data, cb)
discoverImage = (url, data, cb) ->
discoveredURL = discoverImageFromURL(url)
if data
discoveredURL ||= discoverImageFromData(url, data)
discoveredURL = cleanURL(url, discoveredURL)
cb(discoveredURL)
cleanURL = (baseURL, discoveredURL) ->
return null unless discoveredURL
return "#{baseURL}#{discoveredURL}" if discoveredURL.indexOf("/") == 0
return discoveredURL
discoverImageFromURL = (url) ->
console.log(url.indexOf('yfrog'))
if url.indexOf('yfrog') != -1
return url + ":medium"
discoverImageFromData = (url, data) ->
$ = cheerio.load(data)
images = $('img')
maxImage = null
maxDimension = 0
for image in images
currDimension = $(image).attr('width') * $(image).attr('height')
if currDimension > maxDimension
maxDimension = currDimension
maxImage = image
return $(maxImage).attr('src')
@fizerkhan
Copy link

Very nice.
If width and height is available in styles attribute or in css, how to read it?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment