Created
September 1, 2022 07:22
-
-
Save noaacrappy/b941a4099afc7e813701f226adf9cc4c to your computer and use it in GitHub Desktop.
Compute how many values were estimated by year
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ruby compute-final-percent-estimated.rb data/ushcn.v2.5.5.20220825/*.FLs.52j.tmax > tmax-pct-estimated.csv | |
def db_make(filenames) | |
db = {} | |
filenames.each do |filename| | |
db_update(db, filename) | |
end | |
db | |
end | |
Record = Struct.new(:num_not_estimated, :num_estimated) | |
# The format of `*.FLs.52j.{tmin,tmax,tavg}` files is specified in | |
# https://www1.ncdc.noaa.gov/pub/data/ushcn/v2.5/readme.txt | |
def db_update(db, filename) | |
filename_station_id = File.basename(filename).split(".").first | |
data = File.read(filename) | |
lines = data.split("\n") | |
lines.each_with_index do |line, idx| | |
station_id = line[0,11] | |
raise "#{filename} line #{idx + 1}: bad station_id #{station_id}" if station_id != filename_station_id | |
year = line[12,4].to_i | |
unless db.has_key?(year) | |
db[year] = Record.new(0, 0) | |
end | |
raise "#{filename} line #{idx + 1}: bad year #{year}" unless (1700..2100).include?(year) | |
chunks = chunkify_string(line[16..], 9) | |
chunks.each do |chunk| | |
value = chunk[0,6].to_i | |
# -9999 indicates no value present. This of course is not an estimate, | |
# but neither is it a genuine reading. For now, let's skip it, but there | |
# may be some argument for counting it among the "not estimated". | |
next if value == -9999 | |
if chunk[6] == "E" | |
db[year].num_estimated += 1 | |
else | |
db[year].num_not_estimated += 1 | |
end | |
end | |
end | |
end | |
def chunkify_string(s, size) | |
(0 .. (s.length - 1) / size).map { |i| s[i * size,size] } | |
end | |
def db_dump(db) | |
db.keys.sort.each do |year| | |
record = db[year] | |
total = record.num_estimated + record.num_not_estimated | |
percent_estimated = record.num_estimated.to_f / total * 100 | |
puts "#{year},#{"%.2f" % percent_estimated}" | |
end | |
end | |
if $0 == __FILE__ | |
filenames = ARGV | |
if filenames.empty? | |
$stderr.puts "<*.FLs.52j.{tmin,tmax,tavg}>" | |
exit 1 | |
end | |
$stderr.puts "#{filenames.length} files" | |
db = db_make(filenames) | |
db_dump(db) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment