Skip to content

Instantly share code, notes, and snippets.

@lekevicius
Created March 14, 2013 13:45
Show Gist options
  • Save lekevicius/5161415 to your computer and use it in GitHub Desktop.
Save lekevicius/5161415 to your computer and use it in GitHub Desktop.
Crawls IMDb to find the most popular movies in the last 50 years with their IDs and genres. Run with casper.
listTopMovies = (year) ->
url = "http://www.imdb.com/year/#{ year }/"
casper.then ->
casper.open(url).then ->
# console.log '# ' + url
console.log ''
console.log "========= #{ year } ========="
# console.log url
pageMovies = @evaluate ->
movies = []
$('table.results tr.detailed td.title > a').each ->
imdbid = $(@).attr('href').replace('/title/', '').replace('/', '')
name = $(@).text()
genres = $(@).parent().find('span.genre').first().text()
movies.push { imdb: imdbid, name: name, genres: genres }
movies
# console.log pageMovies
for movie in pageMovies
console.log "#{ movie.name } -> #{ movie.imdb } | #{ movie.genres }"
casper = require('casper').create
# verbose: true
# logLevel: "debug"
clientScripts: ["jquery.js"]
casper.start().then ->
for year in [1962..2012]
listTopMovies year
casper.run ->
@echo 'Completed'
@exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment