dearfrankg/gist:4158831

## gistfile1.coffee

scraper = require 'scraper'
fs = require 'fs'

########################################################
## Hate typing console.log
log = (msg) -> console.log(msg)


#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

class Grabber

  constructor: (@url, @path, @cb) ->
    that = this

    scraper(@url, (err, jQuery) ->
      throw err if err
      that.cb.call(that, jQuery)
    )

  scrapeArray: (g,f, jQuery) ->

    ## GATHER
    ##

    data = []
    count = 0
    jQuery(@path).each () ->

      data[count] = {}
      for doc in g
        data[count][doc.property] =
        if doc.attr is undefined
        then jQuery(this).find(doc.path).html()
        else jQuery(this).find(doc.path).attr(doc.attr)
      count++


    ## FILTER
    ##

    filterOut = (item) ->
      result = false
      for filter in f
        regex = new RegExp(filter.text, 'i')
        result = true if item[filter.property].match(regex)
      return result


    fresh = []
    for item in data
      fresh.push item unless filterOut item

    return fresh

  writeFile: (filename, str) ->

    fs.writeFile(filename, str, (err) ->
      if (err)
        console.log(err);
      else
        console.log("The file was saved!");
    )


#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

new Grabber( 'http://www.thefancy.com', 'li .figure-product', (jQuery) ->


    log 'Pulling data from http://thefancy.com'

    ##
    ## Get the captions and detailUrl
    ##

    data1 = this.scrapeArray(
      [{ property: 'caption', path: 'figcaption'},
       { property: 'detailUrl', path: 'a:eq(1)', attr: 'href'} ]
      [{ property: 'caption', text: 'fancy'}]
      jQuery
    )


    ##
    ## Get the bigImageUrl
    ##

    data2 = []
    count = 0
    for doc in data1
      url = @url + doc.detailUrl

      new Grabber( url, '#container-wrapper', (jQuery) ->

        item = this.scrapeArray(
          [{ property: 'bigImageUrl', path: '.first .fig-image img', attr: 'src'},
           { property: 'price', path: '#sidebar .price'}],
          [],
          jQuery
        )
        data2[count] = item[0]
        count++

        if count is data1.length
          log 'DATA2'
          log 'SIZE: ' + data2.length
          log data2

          this.writeFile( 'fancy.json', JSON.stringify data2 )


      )

)

	scraper = require 'scraper'
	fs = require 'fs'

	########################################################
	## Hate typing console.log
	log = (msg) -> console.log(msg)


	#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	class Grabber

	constructor: (@url, @path, @cb) ->
	that = this

	scraper(@url, (err, jQuery) ->
	throw err if err
	that.cb.call(that, jQuery)
	)

	scrapeArray: (g,f, jQuery) ->

	## GATHER
	##

	data = []
	count = 0
	jQuery(@path).each () ->

	data[count] = {}
	for doc in g
	data[count][doc.property] =
	if doc.attr is undefined
	then jQuery(this).find(doc.path).html()
	else jQuery(this).find(doc.path).attr(doc.attr)
	count++


	## FILTER
	##

	filterOut = (item) ->
	result = false
	for filter in f
	regex = new RegExp(filter.text, 'i')
	result = true if item[filter.property].match(regex)
	return result


	fresh = []
	for item in data
	fresh.push item unless filterOut item

	return fresh

	writeFile: (filename, str) ->

	fs.writeFile(filename, str, (err) ->
	if (err)
	console.log(err);
	else
	console.log("The file was saved!");
	)



	#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
	#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	new Grabber( 'http://www.thefancy.com', 'li .figure-product', (jQuery) ->


	log 'Pulling data from http://thefancy.com'

	##
	## Get the captions and detailUrl
	##

	data1 = this.scrapeArray(
	[{ property: 'caption', path: 'figcaption'},
	{ property: 'detailUrl', path: 'a:eq(1)', attr: 'href'} ]
	[{ property: 'caption', text: 'fancy'}]
	jQuery
	)



	##
	## Get the bigImageUrl
	##

	data2 = []
	count = 0
	for doc in data1
	url = @url + doc.detailUrl

	new Grabber( url, '#container-wrapper', (jQuery) ->

	item = this.scrapeArray(
	[{ property: 'bigImageUrl', path: '.first .fig-image img', attr: 'src'},
	{ property: 'price', path: '#sidebar .price'}],
	[],
	jQuery
	)
	data2[count] = item[0]
	count++

	if count is data1.length
	log 'DATA2'
	log 'SIZE: ' + data2.length
	log data2

	this.writeFile( 'fancy.json', JSON.stringify data2 )



	)

	)