Instantly share code, notes, and snippets.

Embed
What would you like to do?
The East (2013)
Shutter island
Stephen Fry: The Secret Life of the Manic Depressive
The Departed
Alan Partridge: Alpha Papa
Only God Forgives
You've got mail
The Draughtsman's Contract
The Deal
Winter's Bone
A World Apart (1988)
We need to talk about Kevin
source 'https://rubygems.org'
gem 'nokogiri'
require 'nokogiri'
require 'open-uri'
@films=[]
@not_found=[]
def get_film_path(film_name)
film_search_url= URI.escape('http://www.imdb.com/find?q='+film_name+'&s=all')
doc = Nokogiri::HTML(open(film_search_url))
results= doc.xpath('//*[@id="main"]/div/div[2]/table/tr/td[2]/a')
if results.first
first_result_content= results.first.content()
# strip out (1999) and check if it matches
match1= first_result_content.casecmp(film_name.sub(/\(\d+\)/,'').strip) == 0
# add 'The ' and check if it matches
match2= first_result_content.casecmp("The #{film_name}".sub(/\(\d+\)/,'').strip) == 0
return results.first.attr('href') if match1||match2
end
end
def get_film_meta(path)
film_full_url= URI.escape('http://www.imdb.com'+path)
doc = Nokogiri::HTML(open(film_full_url))
score = doc.xpath('//*[@id="overview-top"]/div[3]/div[1]').first
name= doc.xpath('//*[@id="overview-top"]/h1/span[1]').first.content
genres = doc.xpath('//span[@itemprop="genre"]')
return {
:score => score ? score.content : nil,
:name => name,
:genres => genres
}
end
def find_alternatives(film_name)
film_search_url= URI.escape('http://www.imdb.com/find?q='+film_name+'&s=all')
doc = Nokogiri::HTML(open(film_search_url))
alternatives=[]
doc.xpath('//*[@id="main"]/div/div[2]/table/tr/td[2]/a').each do |e|
alternatives.push e.content
end
return alternatives
end
def film_not_found(film_name)
alternatives= find_alternatives(film_name)
p
p " Didn't find '#{film_name}', did you mean: '"
alternatives.each {|film| p " #{film}"}
p
@not_found.push film_name
end
File.readlines('film_list').each do |raw_film_name|
raw_film_name.strip!
path = get_film_path(raw_film_name)
if (!path.nil? && path.include?('/title/')) #if the path is for a film title..
meta= get_film_meta(path)
score= meta[:score]
film_name= meta[:name]
p "#{film_name}: #{score}"
if score
@films.push meta
else
film_not_found raw_film_name
end
else
film_not_found raw_film_name
end
end
puts
puts
puts "<<<< RESULTS >>>> "
@films = @films.sort! {|x, y| x[:score] <=> y[:score] }
@films.reverse.each do |film|
genres_string= film[:genres].nil? ? '' : film[:genres].collect {|e| "#"+e}.join(" ")
p "#{film[:name]}: #{film[:score]} #{genres_string}"
end
puts
puts
puts "=== Not Found... ==="
@not_found.each {|film| p film}
puts
puts
puts "=== FINISHED === "
@iandundas

This comment has been minimized.

Owner

iandundas commented Feb 21, 2014

It's a quickly hashed together script but works as of today (when IMDB change their site layout it'll probably break).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment