Skip to content

Instantly share code, notes, and snippets.

@leonid-shevtsov
Created March 9, 2024 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leonid-shevtsov/b51d412e2a2c8600b61592b352fb1ddc to your computer and use it in GitHub Desktop.
Save leonid-shevtsov/b51d412e2a2c8600b61592b352fb1ddc to your computer and use it in GitHub Desktop.
require 'strscan'
stats = Hash.new { |h, k| h[k] = [10_000, -10_000, 0, 0] }
SEMI = String.new(';', encoding: Encoding::BINARY)
NEWLINE = String.new("\n", encoding: Encoding::BINARY)
BUFLEN = 128 * 1024 * 1024
buf = ' ' * BUFLEN
buf.force_encoding(Encoding::BINARY)
city = String.new(' ', encoding: Encoding::BINARY) * 1024
temp_s = String.new(' ', encoding: Encoding::BINARY) * 1024
remainder = String.new(' ', encoding: Encoding::BINARY) * 1024
empty = String.new('', encoding: Encoding::BINARY)
citylen = 1024
templen = 1024
File.open('measurements.txt', encoding: Encoding::BINARY) do |f| # rubocop:disable Metrics/BlockLength
scanner = StringScanner.new(buf)
remainder = empty
loop do # rubocop:disable Metrics/BlockLength
f.readpartial(BUFLEN, buf)
citypos = 0
temppos = 0
scanner.reset
# special handling for first line of block
fcity, ftemp_s = (remainder + scanner.scan_until(/\n/)).split(SEMI, 2)
ftemp = ftemp_s.to_f
existing = stats[fcity]
min, max, sum, count = existing
existing[0] = ftemp if ftemp < min
existing[1] = ftemp if ftemp > max
existing[2] = sum + ftemp
existing[3] = count + 1
citypos = scanner.pos
loop do
break unless scanner.skip_until(/;/)
newcitylen = scanner.pos - citypos - 1
city.bytesplice(0, citylen, buf, citypos, newcitylen)
citylen = newcitylen
temppos = scanner.pos
break unless scanner.skip_until(/\n/)
newtemplen = scanner.pos - temppos - 1
temp_s.bytesplice(0, templen, buf, temppos, newtemplen)
templen = newtemplen
citypos = scanner.pos
temp = temp_s.to_f
existing = stats[city]
min, max, sum, count = existing
existing[0] = temp if temp < min
existing[1] = temp if temp > max
existing[2] = sum + temp
existing[3] = count + 1
end
remainder.bytesplice(0..-1, buf, citypos..-1)
end
rescue EOFError
# all good
end
stats.each do |city, cstats|
puts "#{city}=#{cstats[0]}/#{cstats[2] / cstats[3]}/#{cstats[1]}"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment