require "rubygems" require "nokogiri" require "open-uri" require "yaml" def username_lookup(val, hash) if hash[val].nil? print "-- #{val}" screenname = Nokogiri::XML(open("http://twitter.com/users/show.xml?user_id=#{val}").readlines.join).search("screen_name")[0].content.to_s print " = #{screenname}\n" hash[val] = screenname sleep 30 # so as not to exceed the Twitter API limit end return hash[val] end (1..76).to_a.each do |f| hash = YAML::load_file("/home/tom/twitter_usernames.yml") puts "Processing #{f}.xml" origarchive = Nokogiri::XML(open("/home/tom/twitter_archive/#{f.to_s}.xml").readlines.join) origarchive.search("status").collect {|i| i if i.search("in_reply_to_user_id")[0].content != "" && i.search("in_reply_to_screen_name").size == 0 }. delete_if {|i| i.nil? }.collect {|i| screenname = username_lookup(i.search("in_reply_to_user_id")[0].content.to_s, hash); newnode = Nokogiri::XML:: Node.new("in_reply_to_screen_name", origarchive); newnode.content = screenname; i.search("in_reply_to_user_id")[0].add_next_sibling(newnode); i } origarchive.root.write_to(File.open("/home/tom/twitter_archive/#{f.to_s}.xml", "w")) File.open("/home/tom/twitter_usernames.yml", "w") do |out| YAML.dump(hash, out) end end