Skip to content

Instantly share code, notes, and snippets.

@grymoire7
Created February 24, 2023 19:34
Show Gist options
  • Save grymoire7/d9e898e3d015c4f54ac4c48f7c2e763e to your computer and use it in GitHub Desktop.
Save grymoire7/d9e898e3d015c4f54ac4c48f7c2e763e to your computer and use it in GitHub Desktop.
Download sematext logs
#!/usr/bin/env ruby
# Title: Download Sematext log files from S3
require 'date'
start_date = '2022-12-14'.freeze
end_date = '2021-12-15'.freeze
target_dir = "#{Dir.home}/Downloads/logs/".freeze
BUCKET_NAME = 'benchprep-sematext-production'.freeze
SEMATEXT_PREFIX = 'sematext_52f4930f'.freeze
# sematext_download(start_date.to_datetime, )
def sematext_download(start_datetime:, end_datetime:, target_dir: './')
bucket = Aws::S3::Resource.new(region: S3Config.region).bucket(BUCKET_NAME)
start_seconds = start_datetime.beginning_of_hour.to_i
end_seconds = end_datetime.beginning_of_hour.to_i
(start_seconds..end_seconds).step(1.hour).each do |seconds|
dt = Time.at(seconds).utc
ymdh = "#{dt.year}/#{dt.month}/#{dt.day}/#{dt.hour}"
prefix = "#{SEMATEXT_PREFIX}/#{ymdh}"
download_dir = "#{target_dir}/#{ymdh}"
FileUtils.mkdir_p(download_dir)
bucket.objects(prefix: prefix).each do |object| # object is a Aws::S3::ObjectSummary
source = bucket.object(object.key) # source is a Aws::S3::Object
log_file_name = File.basename(source.key)
log_file_timestamp = Time.parse(log_file_name.split('.')[0])
if log_file_timestamp.between?(start_datetime, end_datetime)
puts "downloading #{log_file_name}".green
source.get(response_target: "#{download_dir}/#{log_file_name}")
# system("lzf -d #{download_dir}/#{log_file_name}")
end
end
end
end
puts "Downloading Sematext logs #{start_date}-#{end_date}...."
sematext_download(start_datetime: DateTime.parse(start_date), end_datetime: DateTime.parse(end_date), target_dir: target_dir)
puts "Done."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment