require 'rubygems' require 'open-uri' require 'hpricot' doc = open("http://rinexus.com/blog/2009/01/rhode-island-twitter-sound") { |f| Hpricot(f) } twits = [] (doc/'.content/p').each do |p| p.inner_html.each do |line| matches = line.scan(/@(\w+)/m) twits.concat(matches) unless matches.empty? end end twits.flatten! twits.uniq! twits.each do |twit| puts twit end