Created
March 23, 2022 14:20
-
-
Save veer66/8236122d4f9149f45671acbecd472ea8 to your computer and use it in GitHub Desktop.
Use Ruby's ractor to run Apertium tools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NUMBER_OF_WORKERS = 8 | |
def tag(sent, line_no) | |
tag_str = nil | |
IO.popen("lt-proc ../data/eng-tha.automorf.bin | cg-proc -n -1 ../data/eng-tha.rlx.bin", "r+") do |f| | |
f.write sent | |
f.close_write | |
tag_str = f.gets | |
end | |
return tag_str | |
end | |
output_actor = Ractor.new do | |
out_h = {} | |
i = 0 | |
loop do | |
msg = Ractor.receive | |
break if msg == :DONE | |
out_h[msg[:line_no]] = msg[:tag_str] | |
while out_h[i] | |
puts out_h[i] | |
i += 1 | |
end | |
end | |
Ractor.yield :DONE | |
end | |
workers = (1..NUMBER_OF_WORKERS).map do |i| | |
Ractor.new output_actor,i do |output_actor, i| | |
loop do | |
msg = Ractor.receive | |
break if msg == :DONE | |
tag_str = tag(msg[:sent], msg[:line_no]) | |
output_actor.send({line_no: msg[:line_no], tag_str: tag_str}) | |
end | |
Ractor.yield :DONE | |
end | |
end | |
line_no = 0 | |
$stdin.each_line do |line| | |
encoded_line = line.chomp | |
.gsub("\u00AD\u2013", " -") | |
.gsub(/[$\^=\/<>{}\[\]@\\]/) {|s| (s.ord + 0x100100).chr("UTF-8")} | |
worker_i = line_no % NUMBER_OF_WORKERS | |
workers[worker_i].send({line_no: line_no, sent: encoded_line}) | |
line_no += 1 | |
end | |
workers.each {|w| w.send :DONE} | |
output_actor.send :DONE | |
workers.each {|w| p w.take} | |
p output_actor.take |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment