Skip to content

Instantly share code, notes, and snippets.

@noaacrappy
Created August 26, 2022 09:33
Show Gist options
  • Save noaacrappy/0148630253ea03c313a9bf976978cb26 to your computer and use it in GitHub Desktop.
Save noaacrappy/0148630253ea03c313a9bf976978cb26 to your computer and use it in GitHub Desktop.
Find NOAA stations that have not reported recently compared to most recent final data
require "date"
DATA_DIR = "data"
FINAL_DIR = "#{DATA_DIR}/ushcn.v2.5.5.20220825"
DAILY_DIR = "#{DATA_DIR}/ghcnd_hcn"
ELEMENT = "tmax"
Station = Struct.new(:id, :lat, :lon, :state, :name)
# The format of `ushcn-v2.5-stations.txt` is specified by
# https://www1.ncdc.noaa.gov/pub/data/ushcn/v2.5/readme.txt
def station_db_make(filename)
station_db = {}
data = File.read(filename)
lines = data.split("\n")
lines.each do |line|
id = line[0,11]
lat = line[11,9].strip
lon = line[21,9].strip
state = line[38,2]
name = line[41,30].strip
station = Station.new(id, lat, lon, state, name)
station_db[station.id] = station
end
station_db
end
def final_get_most_recent_month(station_id)
filename = "#{FINAL_DIR}/#{station_id}.FLs.52j.#{ELEMENT}"
data = File.read(filename)
lines = data.split("\n")
lines.reverse.each do |line|
year = line[12,4].to_i
chunks = chunkify_string(line[16..], 9)
idx = chunks.reverse.find_index {|c| !c.include?("-9999") }
if idx.nil?
# It seems as if all the entries contain "-9999"? Maybe it's January?
# Let's look on the previous line.
next
end
month = 12 - idx
return [year, month]
end
raise "not found"
end
def daily_get_most_recent_day(station_id)
daily_station_id = station_id.gsub("USH", "USC")
filename = "#{DAILY_DIR}/#{daily_station_id}.dly"
data = File.read(filename)
lines = data.split("\n")
lines = lines.select {|line| line[17,4] == ELEMENT.upcase }
lines.reverse.each do |line|
year = line[11,4].to_i
month = line[15,2].to_i
chunks = chunkify_string(line[21..], 8)
idx = chunks.reverse.find_index {|c| !c.include?("-9999") }
if idx.nil?
# Let's look on the previous line.
next
end
day = 31 - idx
return [year, month, day]
end
raise "not found"
end
def chunkify_string(s, size)
(0 .. (s.length - 1) / size).map { |i| s[i * size,size] }
end
if $0 == __FILE__
station_db = station_db_make("#{DATA_DIR}/ushcn-v2.5-stations.txt")
station_db.keys.each do |station_id|
f_year, f_month = begin
final_get_most_recent_month(station_id)
rescue => e
$stderr.puts "#{station_id}: final: unable to get most recent month: #{e}"
next
end
d_year, d_month, d_day = begin
daily_get_most_recent_day(station_id)
rescue => e
$stderr.puts "#{station_id}: daily: unable to get most recent day: #{e}"
[0, 0, 0]
end
is_zombie = nil
if [d_year, d_month, d_day] == [0, 0, 0]
# This station is a zombie because its daily data file is missing.
is_zombie = true
else
d_dt = Date.new(d_year, d_month, d_day)
f_dt = Date.new(f_year, f_month, 1)
num_days = (f_dt - d_dt).to_i.abs
# This station is a zombie if it's been more than 60 days since its most
# recent daily reading.
is_zombie = num_days > 60
end
station = station_db[station_id]
puts [
station.id, station.lat, station.lon, station.state, station.name,
d_year, d_month, d_day, f_year, f_month, num_days, is_zombie,
].join(",")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment