Skip to content

Instantly share code, notes, and snippets.

@mkulke
Created January 13, 2014 22:32
Show Gist options
  • Save mkulke/8409399 to your computer and use it in GitHub Desktop.
Save mkulke/8409399 to your computer and use it in GitHub Desktop.
scraping with casperjs and functional utility belt
_ = require 'underscore'
casper = require('casper').create()
utils = require('utils')
curry2 = (fn) ->
(arg2) ->
(arg1) ->
fn arg1, arg2
curry3 = (fn) ->
(arg3) ->
(arg2) ->
(arg1) ->
fn arg1, arg2, arg3
n = 0
releases = []
url =
beep: "http://beep.com"
meep: "http://meep.com"
link = if type = casper.cli.get(0) then url[type] else url.beep
lower = curry2(_.invoke)('toLowerCase')
substr = _.bind(Function.prototype.call, String.prototype.substr)
truncateTo80 = curry3(substr)(80)(0)
split = _.bind(Function.prototype.call, String.prototype.split)
splitByPeriod = curry2(split)('.')
splitByDash = curry2(split)('-')
join = _.bind(Function.prototype.call, Array.prototype.join)
joinWithDash = curry2(join)('-')
parse = ->
removeGrp = _.compose(joinWithDash, _.initial, splitByDash)
contains = (str) ->
_.compose(curry2(_.contains)(str.toLowerCase()), lower, splitByPeriod, removeGrp)
is720p = contains '720p'
isForeign = (str) ->
languages = ['german', 'french', 'dutch', 'swedish', 'norwegian', 'nlsubbed', 'spanish', 'flemish', 'portuguese']
_.reduce languages, (memo, language) ->
memo || contains(language)(str)
, false
elements = @getElementsInfo('#releases div span.release')
_.chain(elements)
.pluck('text')
.filter(is720p)
.reject(isForeign)
.map(truncateTo80)
.value()
start = (n) ->
suffix = if n == 0 then '' else "/#{n + 1}"
@start link + suffix, ->
releases = releases.concat parse.call(@)
dispatch = ->
if releases.length <= 24 && n < 10
start.call(@, n++)
@run dispatch
else
echo = _.bind(@echo, @)
_.chain(releases).first(24).each(echo)
@exit()
casper.start().then ->
casper.run(dispatch)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment