Skip to content

Instantly share code, notes, and snippets.

@jamster
Created June 2, 2010 18:28
Show Gist options
  • Save jamster/422772 to your computer and use it in GitHub Desktop.
Save jamster/422772 to your computer and use it in GitHub Desktop.
# Taken and Modified from => \
# http://t-a-w.blogspot.com/2010/05/very-simple-parallelization-with-ruby.html
require "rubygems"
require "active_support"
require 'thread'
def Exception.ignoring_exceptions
begin
yield
rescue Exception => e
puts e.message
STDERR.puts e.message
end
end
module Enumerable
def in_parallel_n(input)
if input.is_a?(Array)
_arry = input
else
_arry = (0..input-1).to_a
end
_count = _arry.length
todo = Queue.new
ts = _arry.map{
Thread.new{
while x = todo.deq
Exception.ignoring_exceptions{ yield(x[0]) }
end
}
}
each{|x| todo << [x]}
_count.times{ todo << nil }
ts.each{|t| t.join}
end
end
require "in_parallel"
class String
def term_escape
self.gsub(/\s+/, "\\ ")
end
end
OUTPUT_DIRECTORY = "/Volumes/OUTPUT/DATA"
INPUT_DIRECOTRY = "/Volumes/INPUT/DATA"
files = Dir.glob("#{INPUT_DIRECOTRY}/**/*.txt".term_escape)
files = files.select{|x| !(x =~ /Person/)}
def zip_file(file)
file_name = file.split("/").last
directory = file.split("/")[0..-2].join("/")
output_directory = "#{OUTPUT_DIRECTORY.term_escape}/#{directory.split("/").last.term_escape}"
puts output_directory
command = %Q(mkdir -p #{output_directory})
puts command
puts `#{command}`
command = %Q(time cat #{file.term_escape} | gzip -c > #{output_directory}/#{file_name.term_escape}.gz)
puts command
puts `#{command}`
puts "FINISHED: #{file_name}"
end
files.in_parallel_n(4) do |file|
zip_file(file)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment