Skip to content

Instantly share code, notes, and snippets.

@pdxmph
Created September 13, 2011 16:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pdxmph/1214303 to your computer and use it in GitHub Desktop.
Save pdxmph/1214303 to your computer and use it in GitHub Desktop.
Clean up twitter handles
#!/usr/bin/env ruby
require "rubygems"
require "active_record"
require "uri"
require "yaml"
config_file = File.open(File.expand_path('~') + "/etc/smt_config.yml")
config = YAML.load(config_file)
sites = config["sites"]
sites.each do |s|
puts "*** #{s[0]}"
db = "stage_#{s[1]['db']}"
ActiveRecord::Base.establish_connection(
:adapter => "mysql",
:host => "127.0.0.1",
:port => 3334,
:username => "",
:password => "",
:database => db
)
class Profile < ActiveRecord::Base
set_table_name "content_type_profile"
end
tally = Hash.new(0)
Profile.all.each do |p|
twitter = p.field_twitter_value
next if twitter == nil
begin
case
when twitter.downcase.match(/^(www\.|)twitter.com/)
new_twitter = "@" + twitter.downcase.sub(/.+?witter.com\//, "")
puts "#{twitter} is in non-URI URL format. Converting to => #{new_twitter}"
tally["non-URI URL"] +=1
when twitter.downcase.match(/^http(s:|:).+twitter\.com\/\#\!/)
new_twitter = "@" + twitter.sub(/^.+?\#\!(.+?)/, "\0").downcase
puts "#{twitter} is in hashbang format. Converting to => #{new_twitter}"
tally["hashbang"] +=1
when twitter.match(/^@\w{1,}/)
next
tally["ok"] +=1
puts "#{twitter} is in @handle format"
when twitter.downcase.match(/^http.+?twitter.com\/\w+?$/)
new_twitter = "@" + URI.parse(twitter).path.gsub(/^\//,"").downcase
puts "#{twitter} is in URL format. Converting to => #{new_twitter}"
tally["URL"] +=1
else
new_twitter = "@#{twitter.downcase}"
puts "#{twitter} is in non-@ handle format. Converting to => #{new_twitter}"
tally["non-@ handle"] +=1
end
rescue => ex
puts ex.message
end
end
puts "\n\n\n*** Tally for #{s[0]}"
tally.each do |t,v|
puts "#{t}: #{v}"
end
puts "\n\n-----------------------------------\n\n"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment