Skip to content

Instantly share code, notes, and snippets.

@grandadmiral-thrawn
Created September 5, 2014 22:56
Show Gist options
  • Save grandadmiral-thrawn/b99be9a164ecb7d5be07 to your computer and use it in GitHub Desktop.
Save grandadmiral-thrawn/b99be9a164ecb7d5be07 to your computer and use it in GitHub Desktop.
Rain Gauge QA Flagging Algorithm
easypre(file_in, fileout_1, fileout_2, fileout_3, raincol, dncol, humancol);
%%% EASYPRE.m will read an input file from a rain gauge as they are presented on the H.J. Andrews Portal site.
%%% EASYPRE.m performs a state transform algorithm to detect whether or not changes in gauge height are valid,
%%% and when they are not, flags them to a specific cause. One can also manually import data. Examples will be
%%% commented.
%%% file_in is a string to a filename of andrews data to import; fileout_1, fileout_2, and fileout_3 are strings to
%%% the names of export files, raincol is the column with the raingauge data (::integer), dncol is the column with
%%% the matlab datenum (::integer), and humancol is the column with the human date (::integer)
%%% IMPORT THE RAW DATA
d = importdata(file_in);
%/* Example for using raw CSV if you choose:
% import the concatenated CENMET data.
% d = importdata('ALL_CEN_PRE.csv');
% d = importdata('ALL_UPL_PRE.csv'); */%
%%% Create a Results vector to hold ALL THE RECORDS!
%/* Andrews data will need 8 records */%
Results = cell(length(d)-1,8);
%%% parse the first two records using regex
FirstLine = regexp(d{1},',','split');
SecondLine = regexp(d{2},',','split');
%/* The column will need to be altered based on the site. NL4 gauges use 10, SH uses 18, and SA uses 12*/%
%%% Raw Gauge == Accumulated Gauge == Baseline
RawGauge = FirstLine{1,raincol}; AccGauge = FirstLine{1,raincol}; Baseline = FirstLine{1,raincol};
%%% RawGauge ?= Accumulated Gauge ?= Baseline
RawGauge2 = SecondLine{1,raincol}; AccGauge2 = SecondLine{1,raincol}; Baseline2 = SecondLine{1,raincol};
%%% Datestamps for records 1 and 2
DN = str2num(FirstLine{1,dncol});
DN2 = str2num(SecondLine{1,dncol});
%%% Human Dates to be used in the output
HumanDate = FirstLine{1,humancol};
HumanDate2 = SecondLine{1,humancol};
vectordate = zeros(length(d),6);
vectordate(1,:) = datevec(DN); % vectorized date
vectordate(2,:) = datevec(DN2); % vectorized date
%%% Turn off warning about converting strings to doubles versus to numbers
warning off
%%% The difference between the raw record and the accumulated record for
%%% the first Difference, always accepted
%%% The corrected difference is the difference set to 0
Diff = str2num(RawGauge) - str2num(AccGauge); Flag = 'A'; CorrDiff = 0;
%%% The difference between subsequent raw records for the second record;
%%% will be tested
Diff2 = str2num(RawGauge2) - str2num(RawGauge);
FirstRecord = {DN, HumanDate, str2num(AccGauge), str2num(RawGauge), str2num(Baseline), Diff, CorrDiff, Flag};
%%% States based on Diff- there are RAINING, MAYBERAINING, and NOTRAINING. Actually, everything is a set or subset of
%%% NOTRAINING. But don't worry about that!
%%% start assuming it is RAINING - this is Oregon after all.
NotRaining = 0;
%%% start with no recent differences
Recent_Diffs = 0;
%%% If the difference is greater positive, the baseline rises,
%%% accumulation rises, and the data is accepted.
if Diff2 > 0;
Baseline2 = str2num(Baseline) + Diff2;
AccGauge2 = str2num(AccGauge) + Diff2;
Flag2 = 'A';
CorrDiff2 = Diff2;
Recent_Diffs = Recent_Diffs + Diff2;
%%% If the difference is negative and it is much greater (1 order of
%%% magnitude give or take 0.1 to deal with the case of 0)
%%% then the Baseline is reset to the raw gauge and it
%%% is flagged as a maintenance, no accumulation occurs. The flag is 'R'
%%% The corrected difference is 0 since the difference is non physical
elseif Diff2 <= 0 & abs(Diff2)/10 >= abs(Diff)+0.1; % add the 0.1 because of the 0 case
Baseline2 = str2num(RawGauge2); % Reset the baseline
Flag2 = 'R'; % R for reset
CorrDiff2 = 0; % The difference for this step isn't real, so it's 0
AccGauge2 = str2num(AccGauge); % The accumulated gauge does not change
%%% If the differences is negative and it is smaller than the previous
%%% difference after the aforementioned one-order-of-magnitude-iSH buffer
%%% is applied, then the Baseline is adjusted to this negative difference,
%%% but the accumulation remains, and this baseline is used for the next
%%% measurement. The flag is 'Ae' for accepted but maybe evaporation
%%% The corrected difference is set to 0, since there isn't really a difference
elseif Diff2 <= 0 & abs(Diff2)/10 < abs(Diff) + 0.1;
Baseline2 = str2num(Baseline);
Flag2 = 'Ae';
CorrDiff2 = 0;
AccGauge2 = str2num(AccGauge) + CorrDiff2;
Recent_Diffs = Recent_Diffs + Diff2;
%%% If all these cases fail (for example a NaN in the data, which is the
%%% fail case I can think of), then the Flag is 'M' and the data is just
%%% repeated from the previous good measurement to keep the program
%%% running.
else Baseline2 = Baseline; Flag2 = 'M'; AccGauge2 = AccGauge; RawGauge2 = RawGauge; CorrDiff2 = Diff2; Recent_Diffs = Recent_Diffs + Diff2;
end
%%% The second record reflects these criteria
SecondRecord = {DN2, HumanDate2, AccGauge2, str2num(RawGauge2), Baseline2, Diff2, CorrDiff2, Flag2};
%%% Both the second and first record are given to the results.
Results(1,:) = FirstRecord;
Results(2,:) = SecondRecord;
%%% holds output for midnight to midnight
M2M = zeros(2000,4);
%%% holds the output for sums from m to m
Daily_Acc = zeros(2000,2);
%%% Trackers
M2Mcount = 0; % start count for midnight to midnight at 0;
Count = 0; % count days during Raining state that are not very different from the prior day
MaybeDry = 0; % only use the counter for the not different days when the MaybeDry flag is on
Bomb = zeros(2000,1);
%%% ENTER LOOP ASSUMING RAINY SEASON
for i = 3:length(Results);
%%% The third line is parsed
ThirdLine = regexp(d{i},',','split');
%%% Raw measurement is taken from the third line
RawGauge3 = ThirdLine{1,raincol};
%%% Comparative measurements are taken from the Results array,
%%% so they are all numeric (see else clause below!)
CompareRaw = Results{i-1,4}; CompareAcc = Results{i-1,3};
CompareBase = Results{i-1,5}; CompareDiff = Results{i-1,6}; CompCorr = Results{i-1,7};
%%% The third DN comes from the raw data
DN3 = str2num(ThirdLine{1,dncol});
HumanDate3 = ThirdLine{1,humancol};
%%% calculate the datevec for midnight to midnight
vectordate(i,:) = datevec(DN3);
%%% if it is the 0 hour
if vectordate(i,4) == 0;
%%% and if it is also the 0th minute
if round(vectordate(i,5)/5)*5 == 0 | round(vectordate(i,5)/5)*5 == 60;
%%% This part appears to work now
%%% catch the RawGauge Measurement
catchhour = str2num(RawGauge3);
%%% Add one to the midnight to midnight count
M2Mcount = M2Mcount + 1;
%%% set the midnight to midnight count's first value to the datenum
M2M(M2Mcount,1) = DN3;
%%% set the midnight to midnight count's second value to the caught raw value
M2M(M2Mcount,2) = catchhour;
%%% record the index number for acc total
M2M(M2Mcount,4) = i;
%%% if there are more than 2 values there,
if M2Mcount >= 2;
%%% calculate the daily midnight to midnight difference and put it in M2M
M2M(M2Mcount,3) = M2M(M2Mcount,2) - M2M(M2Mcount-1,2);
end
end
end
%%% The difference is the difference between the raw measurement
%%% and the running "baseline" which is not the Same thing as the raw
%%% or the accumulated (represents the 'state of the gauge' regardless of loss
%%% The actual change in this interval is the change between the raw gauge and the baseline
Diff3 = str2num(RawGauge3) - CompareBase; % the Differences are comparisons of the Raw and the Base
%%% when it is RAINING!
if NotRaining == 0;
%% figure out what the measurement of recent differences was before went into the loop
PriorRD = Recent_Diffs; % catch the previous measurements's Recent Diff
%%% if The maybe dry flag is on and the recent diffs times a factor
if MaybeDry == 1;
% if the Recent differences are within a 1 and a half mm range of the current differences
% add to the count that it could be dry
if Recent_Diffs - 1.2 <= PriorRD | PriorRD <= Recent_Diffs + 1.2;
Count = Count+1;
else Count = 0;
end
%%% If it's been more than about 6 days of no rain difference, trigger the change for the next measurement
if Count > 1500;
NotRaining = 1;
MaybeDry = 0;
Recent_Diffs = 0;
Count = 0;
else NotRaining = 0;
end
end
%% if the difference is positive or 0 it's probably real in the rainy season
if Diff3 >= 0;
Baseline3 = CompareBase + Diff3; % baseline accumulates rain
if Diff3 < 4;
Flag3 = 'A'; % a positive change in summmer is probably a non event
else Flag3 = 'Bomb';
end
AccGauge3 = CompareAcc + Diff3; % accumulation occurs
CorrDiff3 = Diff3; % difference is correct
Recent_Diffs = Recent_Diffs + Diff3; % recent differences increases
%%% if a very large drop occurs then the gauge has been reset
elseif Diff3 < 0 & abs(Diff3)/10 >= abs(str2num(RawGauge3)- CompareRaw)+0.1;
Baseline3 = CompareRaw; % the baseline is reset
Flag3 = 'R'; % the flag is R
CorrDiff3 = 0; % there isn't a real difference
AccGauge3 = CompareAcc; % the accumulation is the SHme
%%% recent diffs does not change
%%% if the difference is less than 0 and the log of this difference is
%%% less than the abs of the comparative difference plus a small factor
%%% for the case of 0, then it's just regular old evaporation, and gets
%%% a 'Q'
elseif Diff3 < 0 & abs(Diff3)/10 < abs(str2num(RawGauge3)- CompareRaw) + 0.1;
Baseline3 = CompareBase; % the baseline does not change
Flag3 = 'Ae'; % the difference is probably evaporation
CorrDiff3 = 0; % there isn't a corrected difference
AccGauge3 = CompareAcc; % the accumulation does not change
% Recent_Diffs = Recent_Diffs + Diff3 % counts the small flux, in case there is a leak.
elseif CompareRaw == NaN && str2num(RawGauge3) ~= NaN;
Baseline3 = CompareRaw;
Flag3 = 'M';
AccGauge3 = CompareAcc;
RawGauge3 = num2str(CompareRaw);
%%% FOR WHEN NOTHING HAPPENS LIKE NAN
else Baseline3 = CompareBase; Flag3 = 'M'; AccGauge3 = CompareAcc; RawGauge3 = num2str(CompareRaw);
end
%% no negative fluxes are counted in the rainy time's recent diffs, so we can check to see how long it
%% stays the SHme for. If the recent diffs are the SHme as the previous diffs and MaybeDry is not initialized
%% initialize it and start counting
if Recent_Diffs == PriorRD & MaybeDry == 0;
MaybeDry = 1;
Count = Count + 1
end
%%% The baseline can stay the Same as it is in this case since it's probably not draining?
ThirdRecord = {DN3, HumanDate3, AccGauge3, str2num(RawGauge3), Baseline3, Diff3, CorrDiff3, Flag3};
Results(i,:) = ThirdRecord;
clearvars ThirdRecord
elseif NotRaining == 1;
%%% similar to above, if the Diff > 0 or = 0 then the base and
%%% accumulation reflect this change and it is accepted
if Diff3 >= 0
Baseline3 = CompareBase; % a positive change in summer does not change the baseline
if Diff3 < 4;
Flag3 = 'Ae'; % a positive change in summmer is probably a non event
else Flag3 = 'Bomb';
end
AccGauge3 = CompareAcc + Diff3;
CorrDiff3 = 0;
Recent_Diffs = Recent_Diffs + Diff3; % the recent diffs counts this small flux
%%% if a very large drop occurs then the gauge has been reset
elseif Diff3 < 0 & abs(Diff3)/10 >= abs(str2num(RawGauge3)- CompareRaw)+0.1;
Baseline3 = CompareRaw; % the baseline is reset
Flag3 = 'R'; % the flag is R
CorrDiff3 = 0; % there isn't a real difference
AccGauge3 = CompareAcc; % the accumulation is the SHme
%%% Recent diffs ignores the unreal flux
%%% a small change in the summer is probbaly a non-event
elseif Diff3 < 0 & abs(Diff3)/10 < abs(str2num(RawGauge3)- CompareRaw) + 0.1;
Baseline3 = CompareBase; % the baseline does not change
Flag3 = 'Ae'; % the difference is probably evaporation
CorrDiff3 = 0; % there isn't a corrected difference
AccGauge3 = CompareAcc; % the accumulation does not change
elseif CompareRaw == NaN && str2num(RawGauge3) ~= NaN;
Baseline3 = CompareRaw;
Flag3 = 'M';
AccGauge3 = CompareAcc;
RawGauge3 = num2str(CompareRaw);
else Baseline3 = CompareBase; Flag3 = 'M'; AccGauge3 = CompareAcc; RawGauge3 = num2str(CompareRaw);
end % end the loop of simmer conditions
%%% if the recent_diffs is more than 2, move into winter mode.
if Recent_Diffs > 2;
NotRaining = 0;
Baseline3 = str2num(RawGauge3); % reset the baseline
MaybeDry = 0;
Flag = 'Rw'; % flag for winter mode switch
Recent_Diffs = CorrDiff3; % reset the difference counter
end
ThirdRecord = {DN3, HumanDate3, AccGauge3, str2num(RawGauge3), Baseline3, Diff3, CorrDiff3, Flag3};
Results(i,:) = ThirdRecord;
clearvars ThirdRecord
end
end
fprintf(1,'%s\n','Hey, its done');
%filename2 = 'Rawresults_UPLMET_SH_5min_09042014.csv';
fid1 = fopen(fileout_1,'w');
fprintf(fid1,'%s,%s,%s,%s,%s,%s,%s\n','GAUGENAME','DATENUM','RAWGAUGE','RAWDIFF','ADJDIFF','FLAG');
for i = 1:length(Results)
fprintf(fid1,'%s,%s, %4.2f, %4.2f, %4.2f, %s\n', 'UPLMET_SH',Results{i,2},Results{i,4}, Results{i,6}, Results{i,7}, Results{i,8});
end
try
numerical_diff = cell2mat(Results(:,7));
filename = fileout_2;
fid = fopen(filename,'w');
fprintf(fid,'%s,%s\n','DATE','SUM RESULTS')
value = zeros(819,1)
somenum = M2M(:,1);
for j = 1:length(somenum)-1;
value(j) = sum(numerical_diff(M2M(j,4):M2M(j+1,4)));
fprintf(fid,'%d,%4.2f\n',joe(j+1),value(j));
end
fclose(fid)
catch ME
end
filename1 = fileout_3;
fid2 = fopen(filename1,'w');
fprintf(fid2,'%s,%s,%s,%s,%s\n','GaugeName','DATE','algorithm_precip','m2m_precip','index_in_raw');
ds = datestr(M2M(1:819,1));
ds = cellstr(ds);
for i = 1:819;
fprintf(fid2,'%s,%s,%4.2f,%4.2f,%d\n','UPLMET_SH',ds{i},value(i),M2M(i+1,3),M2M(i,4));
end
fclose(fid2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment