Skip to content

Instantly share code, notes, and snippets.

@liaden
Created November 25, 2019 16:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liaden/a9c1f0a0f44f5ef5dd1df3aec9085d36 to your computer and use it in GitHub Desktop.
Save liaden/a9c1f0a0f44f5ef5dd1df3aec9085d36 to your computer and use it in GitHub Desktop.
postprocess rbspy raw.gz data
#!/usr/bin/ruby
require 'zlib'
require 'stringio'
require 'oj'
puts ARGV.inspect
Dir.chdir(ARGV[0])
def idling_unicorn?(line)
json = Oj.load(line)
return true unless json
json = json['trace']
json.dig(0, 'name') == '<c function>' &&
json.dig(1, 'name') == 'worker_loop' &&
json.dig(2, 'name') == 'spawn_missing_workers'
end
def each_trace(filename, &block)
Zlib::GzipReader.open(filename) do |gz|
enum = gz.each_line
enum.take(1)
enum.each(&block)
end
end
def batch_process(batchsize)
inputs = Dir['*.gz']
inputs.delete('merged.gz')
inputs.each_slice(batchsize) do |filenames|
puts "Processing #{filenames}"
tids = filenames.map do |filename|
Thread.new do
yield filename
end
end
tids.each(&:join)
end
end
`rm merged.gz` if Dir.exist?('merged.gz')
Zlib::GzipWriter.open('merged.gz') do |writer|
writer.write("rbspy01\n")
semaphore = Mutex.new
batch_process(8) do |filename|
data = []
begin
each_trace(filename) do |line|
data << line unless idling_unicorn?(line)
end
rescue Zlib::GzipFile::Error => e
puts "\n======\nError processing #{filename}: #{e.message}\n#{e.backtrace}"
data = []
end
data = data.join('')
semaphore.synchronize { writer.write(data) } unless data == ''
end
writer.close
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment