Skip to content

Instantly share code, notes, and snippets.

@havenwood
Created February 14, 2019 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save havenwood/88405ae46a14faab9bdffb3fdb50465c to your computer and use it in GitHub Desktop.
Save havenwood/88405ae46a14faab9bdffb3fdb50465c to your computer and use it in GitHub Desktop.
# frozen_string_literal: true
require 'singleton'
require 'json'
require 'addressable'
require 'pathname'
require 'http'
class JSONClient
class HTTPError < RuntimeError
attr_reader :response
def initialize(response)
@response = response
super(built_error_message)
end
private
def built_error_message
"Received HTTP #{response.code} for URL:\n#{response.uri}"
end
end
include Singleton
CACHE_DIR = Pathname.new "#{__dir__}/../jsoncache"
class Throttle
attr_reader :max_request, :time_period, :request_count, :rate_limit_start
def initialize(max_request, time_period)
@max_request = max_request
@time_period = time_period
@request_count = 0
@rate_limit_start = Time.now.to_f
end
def limit(&block)
if time_elapsed < time_period && request_count >= max_request
rate_limit
end
block.call
ensure
housekeeping
end
private
def rate_limit
wasted_time = (time_period - time_elapsed).round(2)
Resque.redis.incr('json_client:rate_limited_reqs')
Resque.redis.incrby('json_client:rate_limit_waste', (wasted_time * 100).to_i)
sleep(wasted_time)
end
def time_elapsed
Time.now.to_f - rate_limit_start
end
def housekeeping
if time_elapsed > time_period
Resque.redis.incr('json_client:requests_10s')
Resque.redis.incrby('json_client:requests_per_10', request_count)
@rate_limit_start = Time.now.to_f
@request_count = 0
end
@request_count += 1
end
end
def logger
@logger ||= Logger.new(STDOUT)
end
def http
HTTP
end
def path_to_cached_json(url, params, method = 'GET')
hash = Digest::SHA256.hexdigest([url, params.sort, method].inspect)
CACHE_DIR / "#{hash[0..1]}/#{hash[2..3]}/#{hash}.json"
end
def get(url, params: {}, cache: true, retries: 3)
# XXX: If we get an error (404, 500, etc.), should we remember it so that we don't re-request the URL every time the importer runs?
pathname = path_to_cached_json(url, params) if cache
if cache && pathname && pathname.exist?
logger.info("GET #{url} (CACHED)")
begin
JSON.parse(pathname.read)
rescue JSON::ParserError
binding.pry
end
else
begin
logger.info("GET #{url}")
url = URI(url)
existing_params = Hash[URI.decode_www_form(url.query || '')]
url.query = URI.encode_www_form(existing_params.merge(params))
response = if @throttle
@throttle.limit { http.get(url) }
else
http.get(url)
end
json = JSON.parse(response.to_s)
raise HTTPError, response unless response.status.success?
## write to cache
if cache
pathname.dirname.mkpath unless pathname.dirname.exist?
pathname.write(response.to_s.force_encoding('UTF-8'))
end
json
rescue HTTPError => e
warn "TMDB ERROR! #{e.inspect}"
puts '== Headers: ====================================='
puts e.response.headers
puts '== Body: ========================================'
puts e.response.body
case e.response.code
when 522
puts '522! .. Sleeping for 5 seconds.'
sleep 5
when 429
puts 'Rate limited! Sleeping 10 seconds.'
sleep 10
when 520
warn 'Unknown remote server error! Sleeping 30 seconds'
sleep 30
else
raise e
end
retries -= 1
retry if retries >= 0
end
end
end
def request(verb, url, opts = nil)
url = "#{api_url}#{url}" unless url[%r{^https?://}i] or !respond_to?(:api_url)
# Filter out passwords
display_opts = opts.map { |k, v| [k, k.to_s['password'] ? '[PASSWORD FILTERED]' : v] }.to_h
# Display debug information
logger.info "[JSONClient] HTTP #{verb} #{url.inspect} #{display_opts.inspect}"
case verb
when :get
response = http.get(url, params: opts)
when :post
response = http.post(url, form: opts)
when :put
response = http.put(url, form: opts)
when :delete
response = http.delete(url, params: opts)
end
result = case response.content_type.mime_type
when 'application/json'
JSON.parse(response.to_s).to_ostruct
else
raise "Didn't receive a JSON response from the server (got a #{response.content_type.mime_type} instead). Body of response:\n#{response}"
end
logger.info result.pretty_inspect
result
end
%i[post delete put].each do |verb|
define_method(verb) do |url, opts = nil|
request(verb, url, opts)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment