Skip to content

Instantly share code, notes, and snippets.

@kastiglione
Created May 18, 2013 03:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kastiglione/5603188 to your computer and use it in GitHub Desktop.
Save kastiglione/5603188 to your computer and use it in GitHub Desktop.
#!/usr/bin/env casperjs
#
# Example: ./github-users.coffee Greenville
#
{Casper} = require 'casper'
class GitHubUserScraper extends Casper
USER_AGENT: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
constructor: (options={}) ->
options.pageSettings = userAgent: @USER_AGENT
super options
scrapeAll: (selector, property) ->
query = (selector, property) ->
Array::map.call __utils__.findAll(selector), (object) -> object[property]
@evaluate query, selector, property
crawlUsers: ->
@start 'https://github.com/' + @path()
@then => @scrapeUsersAndCrawlNextPage()
scrapeUsersAndCrawlNextPage: ->
users = @scrapeUsers()
return if users.length == 0
users.forEach (user) -> console.log user
@waitBetween 10, 15, => # Be gentle on GitHub and avoid rate limiting
return unless @exists 'a.next_page'
@thenClick 'a.next_page'
@then => @scrapeUsersAndCrawlNextPage()
scrapeUsers: ->
@scrapeAll '#user_search_results .user-list-info > a', 'innerText'
path: ->
location = @cli.args[0]
"search?q=location%3A#{location}&type=Users&s=followers"
waitBetween: (min, max, next) ->
range = max - min
duration = (min + (range * Math.random())) * 1000
@wait duration, next
### Main ###
scraper = new GitHubUserScraper
scraper.crawlUsers()
scraper.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment