mildmojo/haters.rb

## haters.rb
#!/usr/bin/env ruby

#
# haters.rb
#
# Finds all the Twitter users posted to http://gamerfury.tumblr.com and prints
# them, newline-delimited, to STDOUT. Require this file in your own script to
# access ScumbagHaterFinder directly.
#
# Tested with Ruby 2.0.0.
#
# Example usage:
#   $ ruby haters.rb --verbose
#   hater1                    https://twitter.com/hater1
#   ...
#
# Example mass-block ('gem install t' for the lovely 't' CLI Twitter client)
#   $ ruby haters.rb | xargs t block
#

require 'open-uri'
require 'json'
require 'csv'
require 'optparse'
require 'rss'
require 'rexml/document'

# ScumbagHaterFinder parses post descriptions for haters.
#
# Examples:
#   # Just get post descriptions, no parsing.
#   descriptions = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
#
#   # Provide a parser that pulls out the first '@username' in each description.
#   haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all { |desc|
#     desc.scan(/@(\S+)/).first
#   }
#
class ScumbagHaterFinder < Struct.new(:base_url)
  # Parser should return a hater name string or nil.
  def get_all &parser
    page_num = 0
    haters = []

    # By default, return whole description
    parser ||= lambda { |desc|
      desc
    }

    catch :no_more_pages do
      loop do
        page_num += 1
        url = base_url + (page_num == 1 ? '/rss' : "/page/#{page_num}/rss")

        STDERR.puts "Fetching page #{page_num}..."
        page_haters = get_haters_from_rss(url, parser)

        throw :no_more_pages if page_haters.empty?

        haters.concat page_haters
      end
    end

    haters
  end

  ##############################################################################
  private
  ##############################################################################

  def get_haters_from_rss url, parser
    rss = open(url) rescue StringIO.new
    feed = RSS::Parser.parse(rss)

    page_haters = feed.items.map { |item|
      parser.call(item.description)
    }

    page_haters.compact.reject(&:empty?)
  end
end

# ScumbagHaterPresenter formats hater arrays into string representations.
#
# Example:
#   haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
#   presenter = ScumbagHaterPresenter.new(haters)
#
#   presenter.as_text
#   => "hater1\nhater2\n"
#
#   presenter.as_text_verbose
#   => "hater1                    https://twitter.com/hater1\n"
#
#   presenter.as_csv
#   => "hater1,https://twitter.com/hater1\nhater2,https://twitter.com/hater2\n"
#
#   presenter.as_json
#   => "\{"haters\":[[\"hater1\",\"https://twitter.com/hater1\"]]}"
#
class ScumbagHaterPresenter < Struct.new(:haters)
  def as_text
    haters.join("\n")
  end

  def as_text_verbose
    verbose_haters.map { |fields|
      sprintf('%-25s %s', *fields)
    }.join("\n")
  end

  def as_csv
    CSV.generate do |csv|
      verbose_haters.each do |hater|
        csv << hater
      end
    end
  end

  def as_json
    { haters: verbose_haters }.to_json
  end

  ##############################################################################
  private
  ##############################################################################

  def verbose_haters
    haters.map { |hater| [hater, "https://twitter.com/#{hater}"] }
  end
end

if __FILE__ == $0
  options = {format: :as_text}

  # Blargh, command-line arg parsing...
  ARGV.options do |opts|
    opts.banner = "Usage:  #{File.basename($PROGRAM_NAME)} [--text|--json|--csv] [--output <FILE>] [--verbose]"

    opts.separator ''
    opts.on( '-h', '--help', 'Show this help' ) { raise 'help' }
    opts.on( '-v', '--verbose', 'Verbose output' ) { options[:verbose] = true }
    opts.on( '-o', '--output <FILE>', 'Write output to a file' ) do |file|
      options[:output] = file
    end
    opts.on( '-t', '--text', 'Print hater list as newline-delimited text' ) do
      options[:format] = :as_text
    end
    opts.on( '-j', '--json', 'Print hater list as json' ) do
      options[:format] = :as_json
    end
    opts.on( '-c', '--csv', 'Print hater list as CSV' ) do
      options[:format] = :as_csv
    end

    begin
      opts.parse!
      if options[:format] == :as_text && options[:verbose]
        options[:format] = :as_text_verbose
      end
    rescue
      puts opts
      exit
    end
  end

  BASE_URL = 'http://gamerfury.tumblr.com'

  hater_finder = ScumbagHaterFinder.new(BASE_URL)

  hater_parser = ->(desc) {
    # Quick & dirty XML sanitization.
    desc = desc.to_s.sub('&', '&amp;')
    desc = desc.sub(/<[^>]*</, '<')
    # GamerFury usernames are inside <em> tags in post descriptions.
    # Wrap description in <root> tags so REXML doesn't throw a fit.
    doc = REXML::Document.new("<root>#{desc}</root>")
    ems = REXML::XPath.match(doc, '//em')
    hater = ems.last
    (hater && hater.text.to_s.sub('&amp;', '&')) || nil
  }
  haters = hater_finder.get_all(&hater_parser)

  output = ScumbagHaterPresenter.new(haters).send(options[:format])

  if options[:output]
    File.open(options[:output], 'w') do |f|
      f.write output + "\n"
    end
  else
    puts output
  end
end
	#!/usr/bin/env ruby

	#
	# haters.rb
	#
	# Finds all the Twitter users posted to http://gamerfury.tumblr.com and prints
	# them, newline-delimited, to STDOUT. Require this file in your own script to
	# access ScumbagHaterFinder directly.
	#
	# Tested with Ruby 2.0.0.
	#
	# Example usage:
	# $ ruby haters.rb --verbose
	# hater1 https://twitter.com/hater1
	# ...
	#
	# Example mass-block ('gem install t' for the lovely 't' CLI Twitter client)
	# $ ruby haters.rb \| xargs t block
	#

	require 'open-uri'
	require 'json'
	require 'csv'
	require 'optparse'
	require 'rss'
	require 'rexml/document'

	# ScumbagHaterFinder parses post descriptions for haters.
	#
	# Examples:
	# # Just get post descriptions, no parsing.
	# descriptions = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
	#
	# # Provide a parser that pulls out the first '@username' in each description.
	# haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all { \|desc\|
	# desc.scan(/@(\S+)/).first
	# }
	#
	class ScumbagHaterFinder < Struct.new(:base_url)
	# Parser should return a hater name string or nil.
	def get_all &parser
	page_num = 0
	haters = []

	# By default, return whole description
	parser \|\|= lambda { \|desc\|
	desc
	}

	catch :no_more_pages do
	loop do
	page_num += 1
	url = base_url + (page_num == 1 ? '/rss' : "/page/#{page_num}/rss")

	STDERR.puts "Fetching page #{page_num}..."
	page_haters = get_haters_from_rss(url, parser)

	throw :no_more_pages if page_haters.empty?

	haters.concat page_haters
	end
	end

	haters
	end

	##############################################################################
	private
	##############################################################################

	def get_haters_from_rss url, parser
	rss = open(url) rescue StringIO.new
	feed = RSS::Parser.parse(rss)

	page_haters = feed.items.map { \|item\|
	parser.call(item.description)
	}

	page_haters.compact.reject(&:empty?)
	end
	end

	# ScumbagHaterPresenter formats hater arrays into string representations.
	#
	# Example:
	# haters = ScumbagHaterFinder.new('http://gamerfury.tumblr.com').get_all
	# presenter = ScumbagHaterPresenter.new(haters)
	#
	# presenter.as_text
	# => "hater1\nhater2\n"
	#
	# presenter.as_text_verbose
	# => "hater1 https://twitter.com/hater1\n"
	#
	# presenter.as_csv
	# => "hater1,https://twitter.com/hater1\nhater2,https://twitter.com/hater2\n"
	#
	# presenter.as_json
	# => "\{"haters\":[[\"hater1\",\"https://twitter.com/hater1\"]]}"
	#
	class ScumbagHaterPresenter < Struct.new(:haters)
	def as_text
	haters.join("\n")
	end

	def as_text_verbose
	verbose_haters.map { \|fields\|
	sprintf('%-25s %s', *fields)
	}.join("\n")
	end

	def as_csv
	CSV.generate do \|csv\|
	verbose_haters.each do \|hater\|
	csv << hater
	end
	end
	end

	def as_json
	{ haters: verbose_haters }.to_json
	end

	##############################################################################
	private
	##############################################################################

	def verbose_haters
	haters.map { \|hater\| [hater, "https://twitter.com/#{hater}"] }
	end
	end

	if __FILE__ == $0
	options = {format: :as_text}

	# Blargh, command-line arg parsing...
	ARGV.options do \|opts\|
	opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} [--text\|--json\|--csv] [--output <FILE>] [--verbose]"

	opts.separator ''
	opts.on( '-h', '--help', 'Show this help' ) { raise 'help' }
	opts.on( '-v', '--verbose', 'Verbose output' ) { options[:verbose] = true }
	opts.on( '-o', '--output <FILE>', 'Write output to a file' ) do \|file\|
	options[:output] = file
	end
	opts.on( '-t', '--text', 'Print hater list as newline-delimited text' ) do
	options[:format] = :as_text
	end
	opts.on( '-j', '--json', 'Print hater list as json' ) do
	options[:format] = :as_json
	end
	opts.on( '-c', '--csv', 'Print hater list as CSV' ) do
	options[:format] = :as_csv
	end

	begin
	opts.parse!
	if options[:format] == :as_text && options[:verbose]
	options[:format] = :as_text_verbose
	end
	rescue
	puts opts
	exit
	end
	end

	BASE_URL = 'http://gamerfury.tumblr.com'

	hater_finder = ScumbagHaterFinder.new(BASE_URL)

	hater_parser = ->(desc) {
	# Quick & dirty XML sanitization.
	desc = desc.to_s.sub('&', '&')
	desc = desc.sub(/<[^>]*</, '<')
	# GamerFury usernames are inside <em> tags in post descriptions.
	# Wrap description in <root> tags so REXML doesn't throw a fit.
	doc = REXML::Document.new("<root>#{desc}</root>")
	ems = REXML::XPath.match(doc, '//em')
	hater = ems.last
	(hater && hater.text.to_s.sub('&', '&')) \|\| nil
	}
	haters = hater_finder.get_all(&hater_parser)

	output = ScumbagHaterPresenter.new(haters).send(options[:format])

	if options[:output]
	File.open(options[:output], 'w') do \|f\|
	f.write output + "\n"
	end
	else
	puts output
	end
	end