patio11/slurper.rb

## slurper.rb
require 'rubygems'
require 'httparty'
require 'fileutils'
require 'json'

USERNAME = ARGV[0] || "patio11"
MAX_TO_FETCH = ARGV[1]

puts "Username: #{USERNAME} max to fetch: #{MAX_TO_FETCH || "all"}"

user_url = "https://hacker-news.firebaseio.com/v0/user/#{USERNAME}.json"
comment_url = "https://hacker-news.firebaseio.com/v0/item/$ID.json"

user_results = HTTParty.get(user_url).parsed_response

comment_ids = user_results["submitted"]

puts comment_ids.inspect

FileUtils::mkdir_p "comments/#{USERNAME}"

to_fetch = MAX_TO_FETCH ? MAX_TO_FETCH.to_i : comment_ids.size
to_fetch = [to_fetch, comment_ids.size].min

sample_ids = comment_ids[0..(to_fetch - 1)]

count = 0
cached = 0

increment = (sample_ids.size / 1000.0 + 0.5).to_i
increment = 1 if increment < 1

sample_ids.map do |id|
  unless File.exist?("comments/#{USERNAME}/#{id}")
    comment_url_to_get = comment_url.sub("$ID", id.to_s)
    response = HTTParty.get(comment_url_to_get).parsed_response #rescue nil
    sleep 0.2
    if response
      count += 1
      f = File.open("comments/#{USERNAME}/#{id}", "w")
      f.write response.to_json
      f.close
      puts "Downloaded #{count} comments of #{comment_ids.size}. Cached: #{cached}" if count % increment == 0
    end
  else
    cached += 1
  end
end
	require 'rubygems'
	require 'httparty'
	require 'fileutils'
	require 'json'

	USERNAME = ARGV[0] \|\| "patio11"
	MAX_TO_FETCH = ARGV[1]

	puts "Username: #{USERNAME} max to fetch: #{MAX_TO_FETCH \|\| "all"}"

	user_url = "https://hacker-news.firebaseio.com/v0/user/#{USERNAME}.json"
	comment_url = "https://hacker-news.firebaseio.com/v0/item/$ID.json"

	user_results = HTTParty.get(user_url).parsed_response

	comment_ids = user_results["submitted"]

	puts comment_ids.inspect

	FileUtils::mkdir_p "comments/#{USERNAME}"

	to_fetch = MAX_TO_FETCH ? MAX_TO_FETCH.to_i : comment_ids.size
	to_fetch = [to_fetch, comment_ids.size].min

	sample_ids = comment_ids[0..(to_fetch - 1)]

	count = 0
	cached = 0

	increment = (sample_ids.size / 1000.0 + 0.5).to_i
	increment = 1 if increment < 1

	sample_ids.map do \|id\|
	unless File.exist?("comments/#{USERNAME}/#{id}")
	comment_url_to_get = comment_url.sub("$ID", id.to_s)
	response = HTTParty.get(comment_url_to_get).parsed_response #rescue nil
	sleep 0.2
	if response
	count += 1
	f = File.open("comments/#{USERNAME}/#{id}", "w")
	f.write response.to_json
	f.close
	puts "Downloaded #{count} comments of #{comment_ids.size}. Cached: #{cached}" if count % increment == 0
	end
	else
	cached += 1
	end
	end