grodowski/memory_profiling_scripts

## memory_profiling_scripts
http://blog.skylight.io/hunting-for-leaks-in-ruby/
http://www.be9.io/2015/09/21/memory-leak/
http://samsaffron.com/archive/2015/03/31/debugging-memory-leaks-in-ruby

# Parse a JSON dump from ObjectSpace.dump_all and sort it to show object allocations count (descending)
# parse_dump.sh
cat $0 |
ruby -rjson -ne 'obj = JSON.parse($_).values_at("file","line","type"); puts obj.join(":") if obj.first ' |
sort      |
uniq -c   |
sort -n > $1


# Compare two outputs of previous bash script
# to count the difference in allocated objects
# between two memory snapshots.
# compare.rb processed_dump_1 processed_dump_2
file1, file2 = ARGV[0], ARGV[1]
puts "comparing #{file1} and #{file2}"
f1 = File.open(file1)
f2 = File.open(file2)

files = {}

f1.each do |line|
  num, path = line.split(' ')
  files[path] = {f1: num.to_i}
end

f2.each do |line|
  num, path = line.split(' ')
  files[path] ||= {f2: num.to_i}
  files[path][:f2] = num.to_i
end

lines = []
files.each do |key, file|
  file[:total] = (file[:f2] || 0) - (file[:f1] || 0)
  lines << {key: key, leaked: file[:total]}
end

lines.sort! { |l1, l2| l1[:leaked] <=> l2[:leaked] }
lines.reverse!.each { |l| puts "#{l[:leaked]} @ #{l[:key]}" }


# More sophisticated algorithm to compare
# three JSON dumps from 'objspace' library and search for leaks.
# I'm not the author of this, source:
# http://blog.skylight.io/hunting-for-leaks-in-ruby/
# compare3.rb json_1 json_2 json_3
require 'set'                                                                                                                   [7/582]
require 'json'

if ARGV.length != 3
  puts "Usage: detect_leaks [FIRST.json] [SECOND.json] [THIRD.json]"
  exit 1
end

first_addrs = Set.new
third_addrs = Set.new

# Get a list of memory addresses from the first dump
File.open(ARGV[0], "r:UTF-8").each_line do |line|
  parsed = JSON.parse(line)
  first_addrs << parsed["address"] if parsed && parsed["address"]
end

# Get a list of memory addresses from the last dump
File.open(ARGV[2], "r:UTF-8").each_line do |line|
  parsed = JSON.parse(line)
  third_addrs << parsed["address"] if parsed && parsed["address"]
end

diff = []

# Get a list of all items present in both the second and
# third dumps but not in the first.
File.open(ARGV[1], "r:UTF-8").each_line do |line|
  parsed = JSON.parse(line)
  if parsed && parsed["address"]
    if !first_addrs.include?(parsed["address"]) && third_addrs.include?(parsed["address"])
      diff << parsed
    end
  end
end

# Group items
diff.group_by do |x|
  [x["type"], x["file"], x["line"]]
end.map do |x,y|
  # Collect memory size
  [x, y.count, y.inject(0){|sum,i| sum + (i['bytesize'] || 0) }, y.inject(0){|sum,i| sum + (i['memsize'] || 0) }]
end.sort do |a,b|
  b[1] <=> a[1]
end.each do |x,y,bytesize,memsize|
  # Output information about each potential leak
  puts "Leaked #{y} #{x[0]} objects of size #{bytesize}/#{memsize} at: #{x[1]}:#{x[2]}"
end

# Also output total memory usage, because why not?
memsize = diff.inject(0){|sum,i| sum + (i['memsize'] || 0) }
bytesize = diff.inject(0){|sum,i| sum + (i['bytesize'] || 0) }
puts "\n\nTotal Size: #{bytesize}/#{memsize}"
	http://blog.skylight.io/hunting-for-leaks-in-ruby/
	http://www.be9.io/2015/09/21/memory-leak/
	http://samsaffron.com/archive/2015/03/31/debugging-memory-leaks-in-ruby

	# Parse a JSON dump from ObjectSpace.dump_all and sort it to show object allocations count (descending)
	# parse_dump.sh
	cat $0 \|
	ruby -rjson -ne 'obj = JSON.parse($_).values_at("file","line","type"); puts obj.join(":") if obj.first ' \|
	sort \|
	uniq -c \|
	sort -n > $1




	# Compare two outputs of previous bash script
	# to count the difference in allocated objects
	# between two memory snapshots.
	# compare.rb processed_dump_1 processed_dump_2
	file1, file2 = ARGV[0], ARGV[1]
	puts "comparing #{file1} and #{file2}"
	f1 = File.open(file1)
	f2 = File.open(file2)

	files = {}

	f1.each do \|line\|
	num, path = line.split(' ')
	files[path] = {f1: num.to_i}
	end

	f2.each do \|line\|
	num, path = line.split(' ')
	files[path] \|\|= {f2: num.to_i}
	files[path][:f2] = num.to_i
	end

	lines = []
	files.each do \|key, file\|
	file[:total] = (file[:f2] \|\| 0) - (file[:f1] \|\| 0)
	lines << {key: key, leaked: file[:total]}
	end

	lines.sort! { \|l1, l2\| l1[:leaked] <=> l2[:leaked] }
	lines.reverse!.each { \|l\| puts "#{l[:leaked]} @ #{l[:key]}" }




	# More sophisticated algorithm to compare
	# three JSON dumps from 'objspace' library and search for leaks.
	# I'm not the author of this, source:
	# http://blog.skylight.io/hunting-for-leaks-in-ruby/
	# compare3.rb json_1 json_2 json_3
	require 'set' [7/582]
	require 'json'

	if ARGV.length != 3
	puts "Usage: detect_leaks [FIRST.json] [SECOND.json] [THIRD.json]"
	exit 1
	end

	first_addrs = Set.new
	third_addrs = Set.new

	# Get a list of memory addresses from the first dump
	File.open(ARGV[0], "r:UTF-8").each_line do \|line\|
	parsed = JSON.parse(line)
	first_addrs << parsed["address"] if parsed && parsed["address"]
	end

	# Get a list of memory addresses from the last dump
	File.open(ARGV[2], "r:UTF-8").each_line do \|line\|
	parsed = JSON.parse(line)
	third_addrs << parsed["address"] if parsed && parsed["address"]
	end

	diff = []

	# Get a list of all items present in both the second and
	# third dumps but not in the first.
	File.open(ARGV[1], "r:UTF-8").each_line do \|line\|
	parsed = JSON.parse(line)
	if parsed && parsed["address"]
	if !first_addrs.include?(parsed["address"]) && third_addrs.include?(parsed["address"])
	diff << parsed
	end
	end
	end

	# Group items
	diff.group_by do \|x\|
	[x["type"], x["file"], x["line"]]
	end.map do \|x,y\|
	# Collect memory size
	[x, y.count, y.inject(0){\|sum,i\| sum + (i['bytesize'] \|\| 0) }, y.inject(0){\|sum,i\| sum + (i['memsize'] \|\| 0) }]
	end.sort do \|a,b\|
	b[1] <=> a[1]
	end.each do \|x,y,bytesize,memsize\|
	# Output information about each potential leak
	puts "Leaked #{y} #{x[0]} objects of size #{bytesize}/#{memsize} at: #{x[1]}:#{x[2]}"
	end

	# Also output total memory usage, because why not?
	memsize = diff.inject(0){\|sum,i\| sum + (i['memsize'] \|\| 0) }
	bytesize = diff.inject(0){\|sum,i\| sum + (i['bytesize'] \|\| 0) }
	puts "\n\nTotal Size: #{bytesize}/#{memsize}"