Skip to content

Instantly share code, notes, and snippets.

@dearfrankg
Created November 28, 2012 03:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dearfrankg/4158831 to your computer and use it in GitHub Desktop.
Save dearfrankg/4158831 to your computer and use it in GitHub Desktop.
scraper = require 'scraper'
fs = require 'fs'
########################################################
## Hate typing console.log
log = (msg) -> console.log(msg)
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
class Grabber
constructor: (@url, @path, @cb) ->
that = this
scraper(@url, (err, jQuery) ->
throw err if err
that.cb.call(that, jQuery)
)
scrapeArray: (g,f, jQuery) ->
## GATHER
##
data = []
count = 0
jQuery(@path).each () ->
data[count] = {}
for doc in g
data[count][doc.property] =
if doc.attr is undefined
then jQuery(this).find(doc.path).html()
else jQuery(this).find(doc.path).attr(doc.attr)
count++
## FILTER
##
filterOut = (item) ->
result = false
for filter in f
regex = new RegExp(filter.text, 'i')
result = true if item[filter.property].match(regex)
return result
fresh = []
for item in data
fresh.push item unless filterOut item
return fresh
writeFile: (filename, str) ->
fs.writeFile(filename, str, (err) ->
if (err)
console.log(err);
else
console.log("The file was saved!");
)
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
new Grabber( 'http://www.thefancy.com', 'li .figure-product', (jQuery) ->
log 'Pulling data from http://thefancy.com'
##
## Get the captions and detailUrl
##
data1 = this.scrapeArray(
[{ property: 'caption', path: 'figcaption'},
{ property: 'detailUrl', path: 'a:eq(1)', attr: 'href'} ]
[{ property: 'caption', text: 'fancy'}]
jQuery
)
##
## Get the bigImageUrl
##
data2 = []
count = 0
for doc in data1
url = @url + doc.detailUrl
new Grabber( url, '#container-wrapper', (jQuery) ->
item = this.scrapeArray(
[{ property: 'bigImageUrl', path: '.first .fig-image img', attr: 'src'},
{ property: 'price', path: '#sidebar .price'}],
[],
jQuery
)
data2[count] = item[0]
count++
if count is data1.length
log 'DATA2'
log 'SIZE: ' + data2.length
log data2
this.writeFile( 'fancy.json', JSON.stringify data2 )
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment