Skip to content

Instantly share code, notes, and snippets.

@jasoncodes
Created November 28, 2017 01:21
Show Gist options
  • Save jasoncodes/72eb6c4de66a4b2b436f7df03f031a49 to your computer and use it in GitHub Desktop.
Save jasoncodes/72eb6c4de66a4b2b436f7df03f031a49 to your computer and use it in GitHub Desktop.
require 'bundler'
Bundler.configure
require 'bundler/inline'
gemfile do
source 'http://rubygems.org'
gem 'activesupport', require: 'active_support/all'
gem 'aws-sdk-s3'
gem 'parallel'
gem 'curb'
gem 'retryable'
end
require 'shellwords'
$app_name = ENV.fetch('APP_NAME')
$papertrail_token = `heroku config:get PAPERTRAIL_API_TOKEN -a #{Shellwords.escape $app_name}`.chomp
raise unless $?.success?
raise unless $papertrail_token.present?
$s3 = Aws::S3::Client.new(
access_key_id: ENV.fetch('S3_ACCESS_KEY_ID'),
secret_access_key: ENV.fetch('S3_SECRET_ACCESS_KEY'),
region: 'us-east-1',
)
$s3_bucket = ENV.fetch('S3_BUCKET')
class RateLimitError < RuntimeError; end
def archive(date, hour)
date_name = date.strftime("%Y-%m-%d")
archive_name = if hour
"%s-%02d" % [date_name, hour]
else
date_name
end
filename = "#{archive_name}.tsv.gz"
s3_key = "#{$app_name}/#{date_name}/#{filename}"
puts "#{$app_name} #{archive_name}: Checking"
exists = begin
$s3.head_object(bucket: $s3_bucket, key: s3_key)
true
rescue Aws::S3::Errors::NotFound
false
end
if exists
puts "#{$app_name} #{archive_name}: Already archived"
return true
end
papertrail_url = "https://papertrailapp.com/api/v1/archives/#{archive_name}/download"
puts "#{$app_name} #{archive_name}: Downloading"
response = nil
Retryable.retryable(tries: 5, sleep: 5, on: RateLimitError) do
response = Curl.get(papertrail_url) do |http|
http.follow_location = true
http.headers['X-Papertrail-Token'] = $papertrail_token
end
if response.response_code == 429
puts "#{$app_name} #{archive_name}: Rate limited"
raise RateLimitError
end
end
if response.response_code == 404
puts "#{$app_name} #{archive_name}: Not found"
return false
end
unless response.response_code == 200
raise response.body
end
unless response.body[0..1] == "\x1F\x8B".force_encoding('ASCII-8BIT')
raise 'expected gzip data'
end
puts "#{$app_name} #{archive_name}: Uploading"
$s3.put_object(bucket: $s3_bucket, key: s3_key, body: response.body)
puts "#{$app_name} #{archive_name}: Done"
return true
end
Parallel.each Date.today.downto(Date.today - 385), in_threads: 5 do |date|
next if archive(date, nil)
hour = 0
while hour < 24
break unless archive(date, hour)
hour += 1
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment