beastaugh/xkcd_titles.rb

## xkcd_titles.rb
#!/usr/bin/env ruby
require 'rubygems'
require 'hpricot'
require 'open-uri'

# Screen scraper for xkcd. The basic idea is to find out whether the comic
# always has the longest title of any image on the page. Inspired by this
# Hacker News posting: http://news.ycombinator.com/item?id=316621

comic_url = 'http://imgs.xkcd.com/comics/'
counter = []

(1..481).each do |i|
  begin
    doc = open("http://xkcd.com/#{i}/") { |f| Hpricot(f) }
    images = doc.search("img").select {|e| e.has_attribute?("title")}.sort_by { |img|
      img.get_attribute("title").length
    }.reverse

    if images.first.get_attribute('src')[0, comic_url.length] != comic_url
      counter.push(i)
    end

    puts "Processed comic #{i}..."
  rescue
    puts "Something went wrong while trying to process comic #{i}"
  end
end

if counter.length > 0
  comic_nums = counter.inject {|join, n| "#{join}, #{n}" }
  puts "The following comics don't have the longest title: #{comic_nums}."
else
  puts "The comic is alway the image with the longest title on xkcd."
end
	#!/usr/bin/env ruby
	require 'rubygems'
	require 'hpricot'
	require 'open-uri'

	# Screen scraper for xkcd. The basic idea is to find out whether the comic
	# always has the longest title of any image on the page. Inspired by this
	# Hacker News posting: http://news.ycombinator.com/item?id=316621

	comic_url = 'http://imgs.xkcd.com/comics/'
	counter = []

	(1..481).each do \|i\|
	begin
	doc = open("http://xkcd.com/#{i}/") { \|f\| Hpricot(f) }
	images = doc.search("img").select {\|e\| e.has_attribute?("title")}.sort_by { \|img\|
	img.get_attribute("title").length
	}.reverse

	if images.first.get_attribute('src')[0, comic_url.length] != comic_url
	counter.push(i)
	end

	puts "Processed comic #{i}..."
	rescue
	puts "Something went wrong while trying to process comic #{i}"
	end
	end

	if counter.length > 0
	comic_nums = counter.inject {\|join, n\| "#{join}, #{n}" }
	puts "The following comics don't have the longest title: #{comic_nums}."
	else
	puts "The comic is alway the image with the longest title on xkcd."
	end