Last active
May 24, 2022 12:00
-
-
Save stenlarsson/60b1e4e99416738b41ee30e7ba294214 to your computer and use it in GitHub Desktop.
arrow_test_csv.rb creates a CSV with random data used by arrow_memory_leak.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gc | |
import resource | |
from pyarrow import csv | |
import sys | |
def print_stats(): | |
gc.collect() | |
print(f'{sys.getrefcount(object)} objects, {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024*1024)} MB') | |
def main(): | |
print_stats() | |
for _ in range(10): | |
csv.read_csv('arrow_test.csv') | |
print_stats() | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'arrow' | |
require 'get_process_mem' | |
def print_stats | |
GC.start | |
puts "#{ObjectSpace.count_objects[:TOTAL]} objects, #{GetProcessMem.new.mb} MB" | |
end | |
print_stats | |
10.times do | |
Arrow::MemoryMappedInputStream.open('arrow_test.csv') do |input| | |
Arrow::CSVReader.new(input).read | |
end | |
print_stats | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'arrow' | |
@divide_function = Arrow::Function.find('divide') | |
@memory_pool = Arrow.default_memory_pool | |
puts "backend_name: #{@memory_pool.backend_name}" | |
def print_stats | |
GC.start | |
puts "#{@memory_pool.bytes_allocated / 1024**2} MB allocated, #{ObjectSpace.count_objects[:TOTAL]} Ruby objects" | |
end | |
print_stats | |
10.times do | |
Arrow::MemoryMappedInputStream.open('arrow_test.csv') do |input| | |
table = Arrow::CSVReader.new(input).read | |
column = table.columns[0] | |
100.times do |column_index| | |
column = table[column_index] | |
@divide_function.execute([column, 1e6]) | |
end | |
end | |
print_stats | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
CSV.open('arrow_test.csv', 'wb') do |csv| | |
100_000.times do | |
csv << 100.times.map { rand } | |
end | |
end | |
puts "File size: #{File.size('arrow_test.csv').fdiv(1024 * 1024).round} MB" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment