Created
August 26, 2022 09:33
-
-
Save noaacrappy/0148630253ea03c313a9bf976978cb26 to your computer and use it in GitHub Desktop.
Find NOAA stations that have not reported recently compared to most recent final data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "date" | |
DATA_DIR = "data" | |
FINAL_DIR = "#{DATA_DIR}/ushcn.v2.5.5.20220825" | |
DAILY_DIR = "#{DATA_DIR}/ghcnd_hcn" | |
ELEMENT = "tmax" | |
Station = Struct.new(:id, :lat, :lon, :state, :name) | |
# The format of `ushcn-v2.5-stations.txt` is specified by | |
# https://www1.ncdc.noaa.gov/pub/data/ushcn/v2.5/readme.txt | |
def station_db_make(filename) | |
station_db = {} | |
data = File.read(filename) | |
lines = data.split("\n") | |
lines.each do |line| | |
id = line[0,11] | |
lat = line[11,9].strip | |
lon = line[21,9].strip | |
state = line[38,2] | |
name = line[41,30].strip | |
station = Station.new(id, lat, lon, state, name) | |
station_db[station.id] = station | |
end | |
station_db | |
end | |
def final_get_most_recent_month(station_id) | |
filename = "#{FINAL_DIR}/#{station_id}.FLs.52j.#{ELEMENT}" | |
data = File.read(filename) | |
lines = data.split("\n") | |
lines.reverse.each do |line| | |
year = line[12,4].to_i | |
chunks = chunkify_string(line[16..], 9) | |
idx = chunks.reverse.find_index {|c| !c.include?("-9999") } | |
if idx.nil? | |
# It seems as if all the entries contain "-9999"? Maybe it's January? | |
# Let's look on the previous line. | |
next | |
end | |
month = 12 - idx | |
return [year, month] | |
end | |
raise "not found" | |
end | |
def daily_get_most_recent_day(station_id) | |
daily_station_id = station_id.gsub("USH", "USC") | |
filename = "#{DAILY_DIR}/#{daily_station_id}.dly" | |
data = File.read(filename) | |
lines = data.split("\n") | |
lines = lines.select {|line| line[17,4] == ELEMENT.upcase } | |
lines.reverse.each do |line| | |
year = line[11,4].to_i | |
month = line[15,2].to_i | |
chunks = chunkify_string(line[21..], 8) | |
idx = chunks.reverse.find_index {|c| !c.include?("-9999") } | |
if idx.nil? | |
# Let's look on the previous line. | |
next | |
end | |
day = 31 - idx | |
return [year, month, day] | |
end | |
raise "not found" | |
end | |
def chunkify_string(s, size) | |
(0 .. (s.length - 1) / size).map { |i| s[i * size,size] } | |
end | |
if $0 == __FILE__ | |
station_db = station_db_make("#{DATA_DIR}/ushcn-v2.5-stations.txt") | |
station_db.keys.each do |station_id| | |
f_year, f_month = begin | |
final_get_most_recent_month(station_id) | |
rescue => e | |
$stderr.puts "#{station_id}: final: unable to get most recent month: #{e}" | |
next | |
end | |
d_year, d_month, d_day = begin | |
daily_get_most_recent_day(station_id) | |
rescue => e | |
$stderr.puts "#{station_id}: daily: unable to get most recent day: #{e}" | |
[0, 0, 0] | |
end | |
is_zombie = nil | |
if [d_year, d_month, d_day] == [0, 0, 0] | |
# This station is a zombie because its daily data file is missing. | |
is_zombie = true | |
else | |
d_dt = Date.new(d_year, d_month, d_day) | |
f_dt = Date.new(f_year, f_month, 1) | |
num_days = (f_dt - d_dt).to_i.abs | |
# This station is a zombie if it's been more than 60 days since its most | |
# recent daily reading. | |
is_zombie = num_days > 60 | |
end | |
station = station_db[station_id] | |
puts [ | |
station.id, station.lat, station.lon, station.state, station.name, | |
d_year, d_month, d_day, f_year, f_month, num_days, is_zombie, | |
].join(",") | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment