Created
September 5, 2014 22:56
-
-
Save grandadmiral-thrawn/b99be9a164ecb7d5be07 to your computer and use it in GitHub Desktop.
Rain Gauge QA Flagging Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
easypre(file_in, fileout_1, fileout_2, fileout_3, raincol, dncol, humancol); | |
%%% EASYPRE.m will read an input file from a rain gauge as they are presented on the H.J. Andrews Portal site. | |
%%% EASYPRE.m performs a state transform algorithm to detect whether or not changes in gauge height are valid, | |
%%% and when they are not, flags them to a specific cause. One can also manually import data. Examples will be | |
%%% commented. | |
%%% file_in is a string to a filename of andrews data to import; fileout_1, fileout_2, and fileout_3 are strings to | |
%%% the names of export files, raincol is the column with the raingauge data (::integer), dncol is the column with | |
%%% the matlab datenum (::integer), and humancol is the column with the human date (::integer) | |
%%% IMPORT THE RAW DATA | |
d = importdata(file_in); | |
%/* Example for using raw CSV if you choose: | |
% import the concatenated CENMET data. | |
% d = importdata('ALL_CEN_PRE.csv'); | |
% d = importdata('ALL_UPL_PRE.csv'); */% | |
%%% Create a Results vector to hold ALL THE RECORDS! | |
%/* Andrews data will need 8 records */% | |
Results = cell(length(d)-1,8); | |
%%% parse the first two records using regex | |
FirstLine = regexp(d{1},',','split'); | |
SecondLine = regexp(d{2},',','split'); | |
%/* The column will need to be altered based on the site. NL4 gauges use 10, SH uses 18, and SA uses 12*/% | |
%%% Raw Gauge == Accumulated Gauge == Baseline | |
RawGauge = FirstLine{1,raincol}; AccGauge = FirstLine{1,raincol}; Baseline = FirstLine{1,raincol}; | |
%%% RawGauge ?= Accumulated Gauge ?= Baseline | |
RawGauge2 = SecondLine{1,raincol}; AccGauge2 = SecondLine{1,raincol}; Baseline2 = SecondLine{1,raincol}; | |
%%% Datestamps for records 1 and 2 | |
DN = str2num(FirstLine{1,dncol}); | |
DN2 = str2num(SecondLine{1,dncol}); | |
%%% Human Dates to be used in the output | |
HumanDate = FirstLine{1,humancol}; | |
HumanDate2 = SecondLine{1,humancol}; | |
vectordate = zeros(length(d),6); | |
vectordate(1,:) = datevec(DN); % vectorized date | |
vectordate(2,:) = datevec(DN2); % vectorized date | |
%%% Turn off warning about converting strings to doubles versus to numbers | |
warning off | |
%%% The difference between the raw record and the accumulated record for | |
%%% the first Difference, always accepted | |
%%% The corrected difference is the difference set to 0 | |
Diff = str2num(RawGauge) - str2num(AccGauge); Flag = 'A'; CorrDiff = 0; | |
%%% The difference between subsequent raw records for the second record; | |
%%% will be tested | |
Diff2 = str2num(RawGauge2) - str2num(RawGauge); | |
FirstRecord = {DN, HumanDate, str2num(AccGauge), str2num(RawGauge), str2num(Baseline), Diff, CorrDiff, Flag}; | |
%%% States based on Diff- there are RAINING, MAYBERAINING, and NOTRAINING. Actually, everything is a set or subset of | |
%%% NOTRAINING. But don't worry about that! | |
%%% start assuming it is RAINING - this is Oregon after all. | |
NotRaining = 0; | |
%%% start with no recent differences | |
Recent_Diffs = 0; | |
%%% If the difference is greater positive, the baseline rises, | |
%%% accumulation rises, and the data is accepted. | |
if Diff2 > 0; | |
Baseline2 = str2num(Baseline) + Diff2; | |
AccGauge2 = str2num(AccGauge) + Diff2; | |
Flag2 = 'A'; | |
CorrDiff2 = Diff2; | |
Recent_Diffs = Recent_Diffs + Diff2; | |
%%% If the difference is negative and it is much greater (1 order of | |
%%% magnitude give or take 0.1 to deal with the case of 0) | |
%%% then the Baseline is reset to the raw gauge and it | |
%%% is flagged as a maintenance, no accumulation occurs. The flag is 'R' | |
%%% The corrected difference is 0 since the difference is non physical | |
elseif Diff2 <= 0 & abs(Diff2)/10 >= abs(Diff)+0.1; % add the 0.1 because of the 0 case | |
Baseline2 = str2num(RawGauge2); % Reset the baseline | |
Flag2 = 'R'; % R for reset | |
CorrDiff2 = 0; % The difference for this step isn't real, so it's 0 | |
AccGauge2 = str2num(AccGauge); % The accumulated gauge does not change | |
%%% If the differences is negative and it is smaller than the previous | |
%%% difference after the aforementioned one-order-of-magnitude-iSH buffer | |
%%% is applied, then the Baseline is adjusted to this negative difference, | |
%%% but the accumulation remains, and this baseline is used for the next | |
%%% measurement. The flag is 'Ae' for accepted but maybe evaporation | |
%%% The corrected difference is set to 0, since there isn't really a difference | |
elseif Diff2 <= 0 & abs(Diff2)/10 < abs(Diff) + 0.1; | |
Baseline2 = str2num(Baseline); | |
Flag2 = 'Ae'; | |
CorrDiff2 = 0; | |
AccGauge2 = str2num(AccGauge) + CorrDiff2; | |
Recent_Diffs = Recent_Diffs + Diff2; | |
%%% If all these cases fail (for example a NaN in the data, which is the | |
%%% fail case I can think of), then the Flag is 'M' and the data is just | |
%%% repeated from the previous good measurement to keep the program | |
%%% running. | |
else Baseline2 = Baseline; Flag2 = 'M'; AccGauge2 = AccGauge; RawGauge2 = RawGauge; CorrDiff2 = Diff2; Recent_Diffs = Recent_Diffs + Diff2; | |
end | |
%%% The second record reflects these criteria | |
SecondRecord = {DN2, HumanDate2, AccGauge2, str2num(RawGauge2), Baseline2, Diff2, CorrDiff2, Flag2}; | |
%%% Both the second and first record are given to the results. | |
Results(1,:) = FirstRecord; | |
Results(2,:) = SecondRecord; | |
%%% holds output for midnight to midnight | |
M2M = zeros(2000,4); | |
%%% holds the output for sums from m to m | |
Daily_Acc = zeros(2000,2); | |
%%% Trackers | |
M2Mcount = 0; % start count for midnight to midnight at 0; | |
Count = 0; % count days during Raining state that are not very different from the prior day | |
MaybeDry = 0; % only use the counter for the not different days when the MaybeDry flag is on | |
Bomb = zeros(2000,1); | |
%%% ENTER LOOP ASSUMING RAINY SEASON | |
for i = 3:length(Results); | |
%%% The third line is parsed | |
ThirdLine = regexp(d{i},',','split'); | |
%%% Raw measurement is taken from the third line | |
RawGauge3 = ThirdLine{1,raincol}; | |
%%% Comparative measurements are taken from the Results array, | |
%%% so they are all numeric (see else clause below!) | |
CompareRaw = Results{i-1,4}; CompareAcc = Results{i-1,3}; | |
CompareBase = Results{i-1,5}; CompareDiff = Results{i-1,6}; CompCorr = Results{i-1,7}; | |
%%% The third DN comes from the raw data | |
DN3 = str2num(ThirdLine{1,dncol}); | |
HumanDate3 = ThirdLine{1,humancol}; | |
%%% calculate the datevec for midnight to midnight | |
vectordate(i,:) = datevec(DN3); | |
%%% if it is the 0 hour | |
if vectordate(i,4) == 0; | |
%%% and if it is also the 0th minute | |
if round(vectordate(i,5)/5)*5 == 0 | round(vectordate(i,5)/5)*5 == 60; | |
%%% This part appears to work now | |
%%% catch the RawGauge Measurement | |
catchhour = str2num(RawGauge3); | |
%%% Add one to the midnight to midnight count | |
M2Mcount = M2Mcount + 1; | |
%%% set the midnight to midnight count's first value to the datenum | |
M2M(M2Mcount,1) = DN3; | |
%%% set the midnight to midnight count's second value to the caught raw value | |
M2M(M2Mcount,2) = catchhour; | |
%%% record the index number for acc total | |
M2M(M2Mcount,4) = i; | |
%%% if there are more than 2 values there, | |
if M2Mcount >= 2; | |
%%% calculate the daily midnight to midnight difference and put it in M2M | |
M2M(M2Mcount,3) = M2M(M2Mcount,2) - M2M(M2Mcount-1,2); | |
end | |
end | |
end | |
%%% The difference is the difference between the raw measurement | |
%%% and the running "baseline" which is not the Same thing as the raw | |
%%% or the accumulated (represents the 'state of the gauge' regardless of loss | |
%%% The actual change in this interval is the change between the raw gauge and the baseline | |
Diff3 = str2num(RawGauge3) - CompareBase; % the Differences are comparisons of the Raw and the Base | |
%%% when it is RAINING! | |
if NotRaining == 0; | |
%% figure out what the measurement of recent differences was before went into the loop | |
PriorRD = Recent_Diffs; % catch the previous measurements's Recent Diff | |
%%% if The maybe dry flag is on and the recent diffs times a factor | |
if MaybeDry == 1; | |
% if the Recent differences are within a 1 and a half mm range of the current differences | |
% add to the count that it could be dry | |
if Recent_Diffs - 1.2 <= PriorRD | PriorRD <= Recent_Diffs + 1.2; | |
Count = Count+1; | |
else Count = 0; | |
end | |
%%% If it's been more than about 6 days of no rain difference, trigger the change for the next measurement | |
if Count > 1500; | |
NotRaining = 1; | |
MaybeDry = 0; | |
Recent_Diffs = 0; | |
Count = 0; | |
else NotRaining = 0; | |
end | |
end | |
%% if the difference is positive or 0 it's probably real in the rainy season | |
if Diff3 >= 0; | |
Baseline3 = CompareBase + Diff3; % baseline accumulates rain | |
if Diff3 < 4; | |
Flag3 = 'A'; % a positive change in summmer is probably a non event | |
else Flag3 = 'Bomb'; | |
end | |
AccGauge3 = CompareAcc + Diff3; % accumulation occurs | |
CorrDiff3 = Diff3; % difference is correct | |
Recent_Diffs = Recent_Diffs + Diff3; % recent differences increases | |
%%% if a very large drop occurs then the gauge has been reset | |
elseif Diff3 < 0 & abs(Diff3)/10 >= abs(str2num(RawGauge3)- CompareRaw)+0.1; | |
Baseline3 = CompareRaw; % the baseline is reset | |
Flag3 = 'R'; % the flag is R | |
CorrDiff3 = 0; % there isn't a real difference | |
AccGauge3 = CompareAcc; % the accumulation is the SHme | |
%%% recent diffs does not change | |
%%% if the difference is less than 0 and the log of this difference is | |
%%% less than the abs of the comparative difference plus a small factor | |
%%% for the case of 0, then it's just regular old evaporation, and gets | |
%%% a 'Q' | |
elseif Diff3 < 0 & abs(Diff3)/10 < abs(str2num(RawGauge3)- CompareRaw) + 0.1; | |
Baseline3 = CompareBase; % the baseline does not change | |
Flag3 = 'Ae'; % the difference is probably evaporation | |
CorrDiff3 = 0; % there isn't a corrected difference | |
AccGauge3 = CompareAcc; % the accumulation does not change | |
% Recent_Diffs = Recent_Diffs + Diff3 % counts the small flux, in case there is a leak. | |
elseif CompareRaw == NaN && str2num(RawGauge3) ~= NaN; | |
Baseline3 = CompareRaw; | |
Flag3 = 'M'; | |
AccGauge3 = CompareAcc; | |
RawGauge3 = num2str(CompareRaw); | |
%%% FOR WHEN NOTHING HAPPENS LIKE NAN | |
else Baseline3 = CompareBase; Flag3 = 'M'; AccGauge3 = CompareAcc; RawGauge3 = num2str(CompareRaw); | |
end | |
%% no negative fluxes are counted in the rainy time's recent diffs, so we can check to see how long it | |
%% stays the SHme for. If the recent diffs are the SHme as the previous diffs and MaybeDry is not initialized | |
%% initialize it and start counting | |
if Recent_Diffs == PriorRD & MaybeDry == 0; | |
MaybeDry = 1; | |
Count = Count + 1 | |
end | |
%%% The baseline can stay the Same as it is in this case since it's probably not draining? | |
ThirdRecord = {DN3, HumanDate3, AccGauge3, str2num(RawGauge3), Baseline3, Diff3, CorrDiff3, Flag3}; | |
Results(i,:) = ThirdRecord; | |
clearvars ThirdRecord | |
elseif NotRaining == 1; | |
%%% similar to above, if the Diff > 0 or = 0 then the base and | |
%%% accumulation reflect this change and it is accepted | |
if Diff3 >= 0 | |
Baseline3 = CompareBase; % a positive change in summer does not change the baseline | |
if Diff3 < 4; | |
Flag3 = 'Ae'; % a positive change in summmer is probably a non event | |
else Flag3 = 'Bomb'; | |
end | |
AccGauge3 = CompareAcc + Diff3; | |
CorrDiff3 = 0; | |
Recent_Diffs = Recent_Diffs + Diff3; % the recent diffs counts this small flux | |
%%% if a very large drop occurs then the gauge has been reset | |
elseif Diff3 < 0 & abs(Diff3)/10 >= abs(str2num(RawGauge3)- CompareRaw)+0.1; | |
Baseline3 = CompareRaw; % the baseline is reset | |
Flag3 = 'R'; % the flag is R | |
CorrDiff3 = 0; % there isn't a real difference | |
AccGauge3 = CompareAcc; % the accumulation is the SHme | |
%%% Recent diffs ignores the unreal flux | |
%%% a small change in the summer is probbaly a non-event | |
elseif Diff3 < 0 & abs(Diff3)/10 < abs(str2num(RawGauge3)- CompareRaw) + 0.1; | |
Baseline3 = CompareBase; % the baseline does not change | |
Flag3 = 'Ae'; % the difference is probably evaporation | |
CorrDiff3 = 0; % there isn't a corrected difference | |
AccGauge3 = CompareAcc; % the accumulation does not change | |
elseif CompareRaw == NaN && str2num(RawGauge3) ~= NaN; | |
Baseline3 = CompareRaw; | |
Flag3 = 'M'; | |
AccGauge3 = CompareAcc; | |
RawGauge3 = num2str(CompareRaw); | |
else Baseline3 = CompareBase; Flag3 = 'M'; AccGauge3 = CompareAcc; RawGauge3 = num2str(CompareRaw); | |
end % end the loop of simmer conditions | |
%%% if the recent_diffs is more than 2, move into winter mode. | |
if Recent_Diffs > 2; | |
NotRaining = 0; | |
Baseline3 = str2num(RawGauge3); % reset the baseline | |
MaybeDry = 0; | |
Flag = 'Rw'; % flag for winter mode switch | |
Recent_Diffs = CorrDiff3; % reset the difference counter | |
end | |
ThirdRecord = {DN3, HumanDate3, AccGauge3, str2num(RawGauge3), Baseline3, Diff3, CorrDiff3, Flag3}; | |
Results(i,:) = ThirdRecord; | |
clearvars ThirdRecord | |
end | |
end | |
fprintf(1,'%s\n','Hey, its done'); | |
%filename2 = 'Rawresults_UPLMET_SH_5min_09042014.csv'; | |
fid1 = fopen(fileout_1,'w'); | |
fprintf(fid1,'%s,%s,%s,%s,%s,%s,%s\n','GAUGENAME','DATENUM','RAWGAUGE','RAWDIFF','ADJDIFF','FLAG'); | |
for i = 1:length(Results) | |
fprintf(fid1,'%s,%s, %4.2f, %4.2f, %4.2f, %s\n', 'UPLMET_SH',Results{i,2},Results{i,4}, Results{i,6}, Results{i,7}, Results{i,8}); | |
end | |
try | |
numerical_diff = cell2mat(Results(:,7)); | |
filename = fileout_2; | |
fid = fopen(filename,'w'); | |
fprintf(fid,'%s,%s\n','DATE','SUM RESULTS') | |
value = zeros(819,1) | |
somenum = M2M(:,1); | |
for j = 1:length(somenum)-1; | |
value(j) = sum(numerical_diff(M2M(j,4):M2M(j+1,4))); | |
fprintf(fid,'%d,%4.2f\n',joe(j+1),value(j)); | |
end | |
fclose(fid) | |
catch ME | |
end | |
filename1 = fileout_3; | |
fid2 = fopen(filename1,'w'); | |
fprintf(fid2,'%s,%s,%s,%s,%s\n','GaugeName','DATE','algorithm_precip','m2m_precip','index_in_raw'); | |
ds = datestr(M2M(1:819,1)); | |
ds = cellstr(ds); | |
for i = 1:819; | |
fprintf(fid2,'%s,%s,%4.2f,%4.2f,%d\n','UPLMET_SH',ds{i},value(i),M2M(i+1,3),M2M(i,4)); | |
end | |
fclose(fid2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment