Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Crawls IMDb to find the most popular movies in the last 50 years with their IDs and genres. Run with casper.
listTopMovies = (year) ->
url = "http://www.imdb.com/year/#{ year }/"
casper.then ->
casper.open(url).then ->
# console.log '# ' + url
console.log ''
console.log "========= #{ year } ========="
# console.log url
pageMovies = @evaluate ->
movies = []
$('table.results tr.detailed td.title > a').each ->
imdbid = $(@).attr('href').replace('/title/', '').replace('/', '')
name = $(@).text()
genres = $(@).parent().find('span.genre').first().text()
movies.push { imdb: imdbid, name: name, genres: genres }
movies
# console.log pageMovies
for movie in pageMovies
console.log "#{ movie.name } -> #{ movie.imdb } | #{ movie.genres }"
casper = require('casper').create
# verbose: true
# logLevel: "debug"
clientScripts: ["jquery.js"]
casper.start().then ->
for year in [1962..2012]
listTopMovies year
casper.run ->
@echo 'Completed'
@exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment