Skip to content

Instantly share code, notes, and snippets.

@gnarg
Created October 17, 2012 16:44
Show Gist options
  • Save gnarg/3906644 to your computer and use it in GitHub Desktop.
Save gnarg/3906644 to your computer and use it in GitHub Desktop.
histogram coding challenge
def simple(file)
histogram = Hash.new.tap{|h| h.default = 0 }
File.read(file).each_line do |line|
histogram[line.split(',')[1]] += 1
end
histogram
end
def threading(file, thread_count=10)
histogram = Hash.new.tap{|h| h.default = 0 }
require 'thread'
queue = Queue.new
chunk_size = File.size(file) / thread_count
thread_count.times do |i|
Thread.new do
chunk = Chunk.new(file, chunk_size, i * chunk_size)
queue.push(chunk.parse)
end
end
thread_count.times do
histogram.merge!(queue.pop){|_,a,b| a + b}
end
histogram
end
def forking(file, fork_count=8)
histogram = Hash.new.tap{|h| h.default = 0 }
require 'rinda/tuplespace'
uri = 'druby://localhost:12345'
server = Rinda::TupleSpace.new
DRb.start_service(uri, server)
chunk_size = File.size(file) / fork_count
fork_count.times do |i|
Process.fork do
chunk = Chunk.new(file, chunk_size, i * chunk_size)
DRb.start_service
client = DRbObject.new_with_uri(uri)
client.write(['results', chunk.parse])
end
end
fork_count.times do
results = server.take(['results', nil])[1]
histogram.merge!(results){|_,a,b| a + b }
end
histogram
end
class Chunk
def initialize(file, size, offset)
@handle = File.open(file)
@handle.pos = offset
@size = size
@string = @handle.read(@size)
if @string.index("\n") < @string.index(',')
look_back(4)
end
end
def parse
results = Hash.new.tap{|h| h.default = 0}
offset = 0
while pos = @string.index(',', offset)
if eol = @string.index("\n", pos)
age = @string[pos + 1..eol - 1]
results[age] += 1
offset = eol + 1
else
offset = pos + 1
end
end
results
end
def look_back(bytes)
@handle.seek(- @size - bytes, IO::SEEK_CUR)
@string = @handle.read(bytes) + @string
end
end
# p simple(ARGV[0]) # 1.9.3: 1:38, jruby: 1:23, jruby --fast -J-Xmx1750m -J-Xms1750m: 1:07
# p threading(ARGV[0]) # 1.9.3: 2:03, jruby --fast -J-Xmx1750m -J-Xms1750m: 0:15
p forking(ARGV[0]) # 1.9.3: 0:34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment