-
-
Save YanCheng-go/d4e17831f294199443d0f7682558e608 to your computer and use it in GitHub Desktop.
Extract phenological metrics after fitting a double hyperbolic tangent model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Retrieving phenological metrics from NDVI time series after fitting a double hyperbolic tangent model | |
----------------------------------------------------------------------------------------------------- | |
Yan Cheng | |
chengyan2017@gmail.com | |
20/02/2019 | |
Enschede, NL | |
------------ | |
The important functions include: | |
1. data_initialize(basedate_, ndvi, dateRange) # clean data | |
2. upper_env_fit(X,X_,Y,p,imax,func, plot = None) # fit curve | |
3. phenoList(t,Y,t_, finalY, basedate_) # extract phenological metrics | |
The implementation of those functions for a specific objective: | |
# extract phenological metrics for one pixel | |
1. pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_QA = None, ds_DOY = None, fit = None, plot = None) | |
# extract phenological metrics for entire image | |
2. pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_QA = None, ds_DOY = None, | |
save_arr = None, parameters = None, plot = None, save_plot = None) | |
Other functions: | |
# for visualization | |
draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite, color_scheme = None, save_plot = None) | |
""" | |
import pandas as pd | |
import numpy as np | |
import xarray as xr | |
import netCDF4 | |
from netCDF4 import Dataset | |
from datetime import timedelta as timedelta | |
from datetime import datetime | |
from datetime import timedelta as td | |
import jdcal | |
from scipy.optimize import leastsq | |
import lmfit | |
from lmfit import Model | |
from lmfit import Parameters | |
from scipy.integrate import simps | |
from scipy.stats import pearsonr | |
from sklearn.metrics import mean_squared_error | |
from math import sqrt | |
from multiprocessing import Pool | |
import pickle | |
import itertools | |
from tqdm import tqdm_notebook | |
import os.path | |
import warnings | |
import matplotlib.pyplot as plt | |
import matplotlib.cm | |
import matplotlib.patches as mpatches | |
from matplotlib.lines import Line2D | |
%matplotlib inline | |
plt.rcParams['figure.figsize'] = [7, 5] | |
def data_initialize(basedate_, ndvi, dateRange): | |
""" | |
USAGE | |
----------- | |
- clean data | |
- initialize parameter | |
ARGUMENTS: type, example, description | |
--------- | |
basedate_: string, "20170101" | |
the date to be defined as 0 in the date list | |
ndvi: pd.DataFrame | |
NDVI time series, including date and NDVI | |
dateRange: list, ["20170301","20171001"] | |
the time window of phenology analysis (single season) | |
""" | |
# data clean | |
# remove duplicates | |
ndvi = ndvi.sort_values('NDVI', ascending=False).drop_duplicates('Date') | |
ndvi = ndvi.sort_values('Date') | |
ndvi = ndvi.reset_index(drop = True) | |
ndvi.columns = ['Date','NDVI'] | |
# convert julia date | |
d = [] | |
for i in ndvi['Date']: | |
dtime = int(sum(jdcal.gcal2jd(i.year, i.month, i.day))+1) | |
d.append(dtime) | |
# print(d) | |
df = pd.concat([pd.DataFrame(d),ndvi['NDVI']],axis=1) | |
df.columns = ['DOY','NDVI'] | |
# remove negtive NDVI | |
df.loc[df['NDVI']<=0, 'NDVI'] = np.nan | |
df = df.dropna() | |
df = df.reset_index(drop = True) | |
# set constrains | |
min95_5perc = 0 # minimum difference between 95th and 5th percentile of valid NDVI data to start processing | |
# (0.20 for Schier) | |
minMedian = 0.15 # minimum median NDVI value to start processing (avoid processing in sea) | |
# (0.20 for Schiermonnikoog, 0.15 trial for Kenya) | |
# check if first or last index value is strange and remove (strange = smaller than 5th percentile minus 0.1 NDVI) | |
# --> this can result in very weird fits | |
percNDVI = np.percentile(np.array(df['NDVI']), [5,50,95]) | |
# print(percNDVI) | |
if df.iloc[0,1] <= percNDVI[0]-0.1: | |
df.iloc[0,1] = np.nan | |
if df.iloc[-1,1] <= percNDVI[0]-0.1: | |
df.iloc[-1,1] = np.nan | |
df = df.dropna() | |
percNDVI = np.percentile(np.array(df['NDVI']), [5,50,95]) # recalculate 5th, 50th and 95th percentile in NDVI array | |
# convert string to date | |
basedate = datetime.strptime(basedate_,'%Y%m%d') | |
date1 = datetime.strptime(dateRange[0],'%Y%m%d') | |
date2 = datetime.strptime(dateRange[1],'%Y%m%d') | |
# covert date to julia date | |
basedate_j = int(sum(jdcal.gcal2jd(basedate.year, basedate.month, basedate.day))+1) | |
date1_j = int(sum(jdcal.gcal2jd(date1.year, date1.month, date1.day))+1) | |
date2_j = int(sum(jdcal.gcal2jd(date2.year, date2.month, date2.day))+1) | |
DOY1 = date1_j | |
DOY2 = date2_j | |
# calculate the julia date for midterm | |
midDOY = int(DOY1 + (DOY2-DOY1)/2) | |
# print(midDOY) | |
if df.shape[0] >= 4: | |
if percNDVI[2]-percNDVI[0] >= min95_5perc and percNDVI[1] >= minMedian and df.iloc[3,0]<midDOY and df.iloc[-4,0]> midDOY: | |
t_ = range(int(DOY1),(int(DOY2)+1)) # list of date | |
df_ = df.loc[(df['DOY']<=DOY2) & (df['DOY']>=DOY1) , ['NDVI','DOY']] # use the time series within the time frame | |
# of the study | |
df_ = df_.sort_values('DOY') | |
t = df_['DOY'].tolist() # original date list | |
NDVIseries =df_['NDVI'].tolist() # original NDVI list | |
indT1 = [df['DOY'].tolist().index(x) for x in t if x <= midDOY] # all observations in the green-up phase | |
indT2 = [df['DOY'].tolist().index(x) for x in t if x > midDOY] # all observations in the senesence phase | |
P = [-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0] | |
if indT1 != [] and indT2 != []: | |
NDVIseries1st = df.iloc[indT1, 1].tolist() # NDVI list in the green-up phase | |
NDVIseries2nd = df.iloc[indT2, 1].tolist() # NDVI list in the senesence phase | |
t1st = df.iloc[indT1, 0].tolist() # date list in the green-up phase | |
t2st = df.iloc[indT2, 0].tolist() # date list in the senesence phase | |
minNDVI1st = min(NDVIseries1st) # minimum NDVI value in the green-up phase | |
minNDVI2nd = min(NDVIseries2nd) # minimum NDVI value in the sensence phase | |
indMin1 = NDVIseries1st.index(minNDVI1st) | |
indMin2 = NDVIseries2nd.index(minNDVI2nd) | |
maxNDVI = max(NDVIseries) | |
# print(maxNDVI) | |
indMax = NDVIseries.index(maxNDVI) | |
# print(indMax) | |
# initialize parameters in the hyperbolic tangent model | |
P[0] = minNDVI1st # minimum value in first half series, change to: min(NDVIseries) to overall minimum? | |
P[1] = maxNDVI - minNDVI1st # difference between observed maximum and minimum value in first half series | |
P[2] = int(DOY1+(t[indMax]-DOY1)/2) # midpoint between start of optimization window and timing observed maximum | |
P[3] = 0.02 # for now fixed value: not sure how to get this better | |
P[4] = maxNDVI - minNDVI2nd # difference between observed maximum and minimum value in second half series | |
P[5] = int(t[indMax]+(DOY2-t[indMax])/2) # midpoint between timing observed maximum and end of optimization window | |
P[6] = -0.02 # for now fixed value: not sure how to get this better | |
else: | |
NDVIseries, t, t_, P = [None]*4 | |
else: | |
NDVIseries, t, t_, P = [None]*4 | |
return NDVIseries, t, t_, P | |
# user defined model | |
def tanh_Double(x, a, b, c, d, e, f, g): | |
return a + b * (np.tanh((x - c) * d) + 1) / 2.0 + e * (np.tanh((x - f) * g) + 1) / 2.0 - e | |
# Upper-envelope fitting algorithm | |
def upper_env_fit(X,X_,Y,p,imax,func, plot = None): | |
""" | |
USAGE | |
----------- | |
Find the upper envelop of user defined model | |
ARGUMENTS: type, example, description | |
--------- | |
X: list | |
list of julia date for original date | |
X_: list | |
list of julia date for entire time frame (interval: 1 day) | |
Y: list | |
list of NDVI values for original date | |
imax: numeric, 10 | |
maximum number of iterations | |
func: function | |
user defined model | |
plot (optional): boolean | |
expert figures or not | |
""" | |
a,b,c,d,e,f,g = p | |
params = Parameters() | |
params.add_many(('a', a, True, a/2, None, None, None), | |
('b', b, True, 0, 1.25*b, None, None), | |
('c', c, True, None, None, None, None), | |
('d', d, True, 0, 0.1, None, None), | |
('e', e, True, 0, 1.25*e, None, None), | |
('f', f, True, None, None, None, None), | |
('g', g, True, -0.10, -0.0000001, None, None)) | |
gmodel = Model(func) | |
weights = [1]*len(X) | |
result = gmodel.fit(Y,params,x=X,weights = weights, method = 'leastsq') | |
params = result.best_values | |
predY = result.best_fit | |
lp = tuple([params['a'], params['b'], params['c'], params['d'], params['e'], params['f'], params['g']]) | |
lpredY = predY | |
# print(p) | |
lp0 = lp | |
if plot is True: | |
plt.plot(X, Y, 'o',label='original') | |
plt.plot(X_, func(X_, *lp), 'b--',label='fit') | |
plt.show() | |
resu = [] | |
dif = [] | |
for i, pY in enumerate(predY): | |
dif.append(abs(Y[i]-pY)) | |
resu.append(Y[i]-pY) | |
mm = max(dif) | |
# print(mm) | |
if mm != 0: | |
weights = [1]*len(dif) | |
index = [i for i in range(len(resu)) if resu[i] <= 0] | |
if len(index) != 0: | |
for i in index: | |
weights[i] = 1-dif[i]/mm | |
weights0=weights | |
gdis = sum([abs(dif[i]*weights[i]) for i in range(len(resu))]) | |
ormax = gdis | |
for it in range(imax): | |
# print(it) | |
if gdis <= ormax: | |
a,b,c,d,e,f,g = p | |
params = Parameters() | |
##2019-01-14 reset the limitations of a | |
params.add_many(('a', a, True, a/2, None, None, None), | |
('b', b, True, 0, 1.25*b, None, None), | |
('c', c, True, None, None, None, None), | |
('d', d, True, 0, 0.1, None, None), | |
('e', e, True, 0, 1.25*e, None, None), | |
('f', f, True, None, None, None, None), | |
('g', g, True, -0.10, -0.0000001, None, None)) | |
gmodel = Model(func) | |
result = gmodel.fit(Y,params,x=X, weights = weights, method = 'leastsq') | |
params = result.best_values | |
predY = result.best_fit | |
lp = tuple([params['a'], params['b'], params['c'], params['d'], params['e'], params['f'], params['g']]) | |
lpredY = predY | |
resu = [] | |
dif = [] | |
for i, pY in enumerate(predY): | |
resu.append(Y[i]-pY) | |
dif.append(abs(Y[i]-pY)) | |
mm = max(dif) | |
weights = [1]*len(dif) | |
index = [i for i in range(len(resu)) if resu[i] <= 0] | |
if len(index) != 0: | |
for i in index: | |
weights[i] = 1-dif[i]/mm | |
ormax = gdis | |
gdis = sum([abs(resu[i]*weights0[i]) for i in range(len(resu))]) | |
if plot is True: | |
plt.plot(X, Y, 'o',label='original') | |
plt.plot(X_, func(X_, *lp), 'b--',label='fit') | |
plt.show() | |
n_it = it+2 | |
# if the last fit is not correct, then use the second last fit | |
diffff = gdis-ormax | |
if gdis>ormax and diffff>0.5: | |
lp = lp0 | |
# print(lp) | |
n_it = n_it-1 | |
# print(n_it) | |
break | |
else: | |
lp0 = lp | |
# print(lp0) | |
if plot is True: | |
plt.plot(X, Y, 'o',label='original') | |
plt.plot(X_, func(X_, *lp), 'b--',label='fit') | |
plt.show() | |
# print('The number of iterations is: ' + str(n_it+1)) | |
# print('\nThe optimal parameters are: ' + str(lp)) | |
# print('\nThe fit report is: \n' + result.fit_report()) | |
Y_ = func(X_,*lp) | |
# print(lp) | |
return lp,Y_, n_it | |
def phenoList(t,Y,t_, finalY, basedate_): | |
""" | |
USAGE | |
----------- | |
Extract phenological metrics from fitted curves | |
ARGUMENTS: type, example, description | |
--------- | |
t: list | |
list of julia date for original date | |
t_: list | |
list of julia date for the entire time frame (interval: 1 day) | |
Y: list | |
list of NDVI values for original date | |
finalY: list | |
Xlist of predicted NDVI values for the entire time frame (interval: 1 day) | |
basedate_: string, '20170101' | |
the date to be defined as 0 in the date list | |
""" | |
basedate = datetime.strptime(basedate_,'%Y%m%d') | |
basedate_j = int(sum(jdcal.gcal2jd(basedate.year, basedate.month, basedate.day))+1) | |
t_ = np.array([i-basedate_j for i in t_]) | |
DOY_start = int(t_[0]) | |
DOY_end = int(t_[-1]) | |
NDVI_start = finalY[0] | |
NDVI_end = finalY[-1] | |
df = pd.DataFrame([t_,finalY]).T | |
df.columns = ['DOY', 'NDVI'] | |
df_orginal = pd.DataFrame([np.array(t),np.array(Y)]).T | |
df_orginal.columns = ['DOY', 'NDVI'] | |
df_orginal['Gap'] = df_orginal.DOY.sub(df_orginal.DOY.shift()) | |
df.set_index('DOY', inplace=True) | |
# print(df) | |
peak_DOY = int(df.NDVI.idxmax()) # DOY of the peak season | |
NDVI_upward = df.NDVI[DOY_start:peak_DOY] | |
NDVI_downward = df.NDVI[peak_DOY:DOY_end+1] | |
maxNDVI = df.NDVI.max() # maximum NDVI | |
AMP1 = maxNDVI - NDVI_upward.min() | |
AMP2 = maxNDVI - NDVI_downward.min() | |
def diff(maxNDVI, minNDVI, percentage, direction): | |
threshold = (maxNDVI - minNDVI) * percentage + minNDVI | |
diff = [i - threshold for i in direction] | |
return diff | |
diff_upward_20 = diff(maxNDVI, NDVI_upward.min(), 0.2, NDVI_upward) | |
diff_downward_20 = diff(maxNDVI, NDVI_downward.min(), 0.2, NDVI_downward) | |
SOS20 = t_[sum(i < 0 for i in diff_upward_20)] | |
EOS20 = sum(i > 0 for i in diff_downward_20)+peak_DOY-1 | |
diff_upward_50 = diff(maxNDVI, NDVI_upward.min(), 0.5, NDVI_upward) | |
diff_downward_50 = diff(maxNDVI, NDVI_downward.min(), 0.5, NDVI_downward) | |
SOS50 = t_[sum(i < 0 for i in diff_upward_50)] | |
EOS50 = sum(i > 0 for i in diff_downward_50)+peak_DOY-1 | |
diff_upward_90 = diff(maxNDVI, NDVI_start, 0.9, NDVI_upward) | |
PS90 = t_[sum(i < 0 for i in diff_upward_90)] | |
nImages = len(df_orginal) | |
maxGap = int(df_orginal.Gap.max()) | |
if EOS20 > SOS20: | |
LGS20 = EOS20-SOS20 | |
else: | |
LGS20 = -9998 | |
if EOS50 > SOS50: | |
LGS50 = EOS50-SOS50 | |
else: | |
LGS50 = -9998 | |
warnings.filterwarnings('error') | |
try: | |
cumNDVI20 = simps(df.NDVI[SOS20:EOS20], dx=1) | |
except: | |
cumNDVI20 = -9998 | |
try: | |
cumNDVI50 = simps(df.NDVI[SOS50:EOS50], dx=1) | |
except: | |
cumNDVI50 = -9998 | |
t = np.array([i-basedate_j for i in t]) | |
# print(t) | |
try: | |
PearsonCC = pearsonr(df.NDVI[t], Y) | |
except: | |
PearsonCC = (-9998,-9998) | |
PearsonCC1 = PearsonCC[0] | |
PearsonCC2 = PearsonCC[1] | |
try: | |
RMSD = sqrt(mean_squared_error(Y, df.NDVI[t])) | |
except: | |
RMSD = -9998 | |
try: | |
MSD = np.mean(np.array(pd.DataFrame(Y)) - np.array(pd.DataFrame(df.NDVI[t]))) ## updated | |
# print(df.NDVI[t]) | |
except: | |
MSD = -9998 | |
phenoList_ = [SOS20, SOS50, PS90, peak_DOY, EOS20, EOS50, LGS20, LGS50, AMP1, AMP2, maxNDVI, cumNDVI20, cumNDVI50, | |
PearsonCC1,PearsonCC2, RMSD, MSD,nImages, maxGap] | |
# print(phenoList_) | |
return phenoList_ | |
def pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask, fit = None, plot = None): | |
""" | |
USAGE | |
----------- | |
Retrieve phenological metrics for individual pixel in a image series | |
ARGUMENTS: type, example, description | |
--------- | |
nc_file: string, r'F:\Kapiti\PlanetScope\test\NDVI_cloudMask_Oct_netCDF_S1_3.nc' | |
file path of netCDF data | |
y: numeric, 0 | |
row number in the raster, >=0 | |
x: numeric, 0 | |
column number in the raster, >=0 | |
basedate_: string, '20170101' | |
the date to be defined as 0 in the date list | |
dateRange: list, ["20170301","20171001"] | |
the time window of phenology analysis (single season) | |
n_params: numeric, 7 | |
number of parameters in user defined model | |
ds_cloudMask (optional): boolean | |
use isolated quality layer and DOY layer or not (for MODIS) | |
fit (optional): string, 'upper_env_fit' | |
specific the curve fitting method | |
plot (optional): boolean | |
export figures or not | |
""" | |
# specifiy the curve fitting algorithm | |
if fit is None: | |
fit = 'upper_env_fit' # fit upper envelop of the user defined model | |
ds_NDVI = xr.open_dataset(nc_file) | |
# extract NDVI time series for a pixel (column number x, row number y) | |
dsloc = ds_NDVI.sel(x=x, y=y, method='nearest') | |
dsloc = dsloc.where(dsloc > 0) # remove negative values | |
NDVI = np.array(dsloc['NDVI']) # convert to np.array | |
Date_ = np.array(dsloc['time']) | |
# extract quality time series for a pixel (column number x, row number y) | |
if ds_cloudMask is True: | |
dsloc2 = ds_NDVI.sel(x=x, y=y, method='nearest') | |
# for MODIS | |
class_ = np.array(dsloc2['QA']) | |
dsloc2_bad = list(np.where((class_ == 3002)|(class_ == 3003))[0]) | |
# set bad observations as NAN | |
def setNan(i): | |
NDVI[i]=np.nan | |
list(map(setNan,dsloc2_bad)) | |
# remove bad observations from NDVI time series | |
df_nan = pd.DataFrame(NDVI).dropna() | |
if not df_nan.empty: | |
# for MODIS | |
# convert MODIS DOY layer to normal format of date | |
if ds_cloudMask is True: | |
Date_ = [] | |
for idx, item in enumerate(np.array(dsloc['DOY'])): | |
# print(item) | |
if idx<=37: | |
Date_.append(datetime(2017, 1, 1) + timedelta(item - 1)) | |
else: | |
Date_.append(datetime(2018, 1, 1) + timedelta(item - 1)) | |
Date_ = np.array(Date_) | |
# print(Date_) | |
# NDVI time series | |
ndvi = pd.concat([pd.DataFrame(Date_), pd.DataFrame(NDVI)], axis = 1) | |
ndvi.columns = ['Date','NDVI'] | |
# data initialization | |
Y, t, t_, P = data_initialize(basedate_, ndvi, dateRange) | |
# check if data is validated | |
if Y is None: | |
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20', | |
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations'] | |
# phenoList_ = {key: -9999 for key in keys} | |
phenoList_ = [-9998]*len(keys) | |
if plot is True: | |
print(phenoList_) | |
else: | |
n_obs = len(Y) | |
# curve fitting can be executed only when the number of observaitons are more than the number of parameters | |
if n_obs > n_params: | |
n_it = None # initialize the number of iterations | |
# fit upper envelop of the double hyperbolic tangent model | |
if fit == 'upper_env_fit': | |
lp,Y_,n_it = upper_env_fit(X=t,X_=t_,Y=Y,p=P,imax=10,func=tanh_Double) | |
# if fit == 'non_weighted_fit': | |
# lp,Y_ = non_weighted_fit(X=t,X_=t_,Y=Y,p=P,func=tanh_Double) | |
# if fit == 'upper_env_fit2': | |
# lp,Y_ = upper_env_fit2(X=t,X_=t_,Y=Y,p=P,imax=10,func=tanh_Double) | |
# extract phenological metrics from fitted curves | |
phenoList_ = phenoList(t,Y,t_, Y_, basedate_) | |
if n_it is not None: | |
phenoList_.append(n_it) | |
if plot is True: | |
print(phenoList_) | |
color_list = ['#1b9e77', '#d95f02','#1b9e77'] | |
color = color_list[2] | |
# fig = plt.figure() | |
fig,ax1 = plt.subplots(figsize = (12,8)) | |
gd = [julian.from_jd(i, fmt='jd') for i in t] | |
# print(gd) | |
gd_ = [julian.from_jd(i, fmt='jd') for i in t_] | |
plt.plot(gd,Y,color = color , marker = 'o', linewidth = 0) | |
plt.plot(gd_,Y_, color = color,linewidth = 4) | |
x1 = datetime(2017, 1, 1,12,0,0) + timedelta(int(phenoList_[1])) | |
x2 = datetime(2017, 1, 1,12,0,0) + timedelta(int(phenoList_[5])) | |
plt.plot([x1, x1],[0,0.1], color = color, linewidth = 4) | |
plt.plot([x2, x2],[0,0.1], color = color, linewidth = 4) | |
date_fill = [x1 + timedelta(days=x) for x in range(0, (x2-x1).days+1)] | |
df = pd.DataFrame({'Date':gd_, 'NDVI': Y_}) | |
df_fill = df.loc[df['Date'].isin(date_fill)] | |
plt.fill_between(date_fill,np.array(df_fill['NDVI']), alpha = 0.2, color = '#1b9e77') | |
# plt.text(x, y, s, fontsize=12) | |
plt.ylim(0) | |
plt.xticks(fontsize = 15, rotation = 0) | |
plt.yticks(fontsize = 15) | |
# ax1.set_ylim(0.3,0.6) | |
# plt.setp(ax1.get_yticklabels()[-1], visible=False) | |
# plt.setp(ax1.get_yticklabels()[0], visible=False) | |
ax1.tick_params(axis="x",direction="in", which = 'major', length = 10) | |
ax1.tick_params(axis="x",direction="in", which = 'minor', length = 5) | |
ax1.tick_params(axis="y",direction="in", which = 'major', length = 10) | |
# ax1.xaxis.set_tick_params(labelsize = 20, rotation = 0, which = 'both') | |
# ax1.yaxis.set_tick_params(labelsize = 20) | |
# ax1.set_ylabel('Camera-derived GCC', fontsize = 20) | |
legend_elements = [Line2D([0], [0], color=color_list[0], lw=4, label='Location A'), | |
Line2D([0], [0], color=color_list[1], lw=4, label='Location A (modified)'), | |
Line2D([0], [0], color=color_list[2], lw=4, label='Location B')] | |
# red_patch = mpatches.Patch(edgecolor=color[0],fill = False, linewidth = 2,label='Location A') | |
# blue_patch = mpatches.Patch(edgecolor=color[1], fill = False,linewidth = 2,label='Location A (modified)') | |
# yellow_patch = mpatches.Patch(edgecolor=color[2], fill = False,linewidth = 2, label='Location B') | |
# L1 = plt.plot([0],[0],color = color[0],label = 'Location A') | |
# L2 = plt.plot([0],[0],color = color[1],label = 'Location A (modified)') | |
# L3 = plt.plot([0],[0],color = color[2],label = 'Location B') | |
# plt.legend(handles=[L1,L2,L3], fontsize = 20) | |
# plt.legend() | |
# plt.legend(handles=[red_patch, blue_patch,yellow_patch], fontsize = 15) | |
# plt.legend(handles=legend_elements, fontsize = 15,loc = [1.5,0.5]) | |
plt.ylabel('NDVI', fontsize = 15) | |
plt.xlabel('Date (Year-Month)', fontsize = 15) | |
plt.show() | |
else: | |
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', | |
'cumNDVI20', 'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations'] | |
# phenoList_ = {key: -9999 for key in keys} | |
phenoList_ = [-9998]*len(keys) | |
else: | |
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20', | |
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations'] | |
# phenoList_ = {key: -9999 for key in keys} | |
phenoList_ = [-9999]*len(keys) | |
return phenoList_ | |
def pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_cloudMask, | |
save_arr = None, parameters = None, plot = None, save_plot = None): | |
""" | |
USAGE | |
----------- | |
Retrieve phenological metrics for specific area in the image | |
ARGUMENTS: type, example, description | |
--------- | |
y_start, y_end, x_start, x_end: numeric, 0, 100, 0, 100 | |
specify an area in the image | |
basedate_: string, '20170101' | |
the date to be defined as 0 in the date list | |
dateRange: list, ["20170301","20171001"] | |
the time window of phenology analysis (single season) | |
n_params: numeric, 7 | |
the number of parameters in the user defined model | |
nc_file: string, r'D:\Kapiti\netCDF\NDVI_cloudMask_netCDF_S1.nc' | |
file path of netCDF data | |
work_dir: string, r'D:\Kapiti' | |
file path of work directory | |
ds_cloudMask (optional): boolean | |
use isolated quality layer and DOY layer or not (for MODIS) | |
fit (optional): string, 'upper_env_fit' | |
specific the curve fitting method | |
save_arr (optional): boolean | |
save phenology map (array) or not | |
parameters (optional): string, 'SOS50' | |
specify a phenological metric | |
plot (optional): boolean | |
export figures or not | |
save_plot (optional): boolean | |
save phenology map (figure) or not | |
""" | |
if parameters is None: | |
parameters_select = parameters | |
y_range = range(y_start,y_end) | |
n_y = y_end-y_start | |
x_range = range(x_start,x_end) | |
n_x = x_end-x_start | |
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20', | |
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations'] | |
var = len(keys) | |
# ds_NDVI = xr.open_dataset(nc_file) | |
coordinate_list = list(itertools.product(y_range, x_range)) | |
map_arr = np.zeros((n_y,n_x,var)) | |
if parameters is None: | |
def fun(coordinate): | |
y,x = coordinate | |
i = y-y_start | |
j = x-x_start | |
# map_arr[i][j][:] = list(pheno_calc(ds_NDVI, y, x, basedate_, dateRange).values()) | |
map_arr[i][j] = pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask) | |
else: | |
def fun(coordinate): | |
y,x = coordinate | |
i = y-y_start | |
j = x-x_start | |
map_arr[i][j] = \ | |
pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask)[keys.index(parameters_select)] | |
list(map(fun, tqdm_notebook(coordinate_list, total=len(coordinate_list), | |
unit='pixel', desc = 'pheno_map'))) | |
if plot is True: | |
masked_array = np.ma.masked_where(map_arr==-9999, map_arr) | |
## visuallization | |
cmap = matplotlib.cm.jet | |
cmap.set_bad('white',1.) | |
# cm = m.colors.LinearSegmentedColormap('my_colormap', cdict, 1024) | |
normalize =matplotlib.colors.Normalize(vmin=75, vmax=150) | |
fig, ax = plt.subplots(1,1) | |
ax.imshow(masked_array, cmap=cmap,norm=normalize) | |
ax.set_xticks(np.arange(-.5, 1, (x_end-x_start)/2)) | |
ax.set_yticks(np.arange(-.5, 1, (y_end-y_start)/2)) | |
ax.set_xticklabels(np.arange(x_start, x_end, (x_end-x_start)/2)) | |
ax.set_yticklabels(np.arange(y_start, y_end, (y_end-y_start)/2)) | |
# plt.colorbar() | |
plt.show() | |
if save_plot is True: | |
path_plot = work_dir + '\\' + 'phenoMap' | |
fig.savefig(path_plot +'\\' + 'phenoMap_'+ str(x_start) + '_' + str(x_end) + '_'+ str(y_start) + '_' + str(y_end)) | |
if save_arr is True: | |
if os.path.exists(work_dir) == False: | |
os.mkdir(work_dir) | |
path_arr = work_dir + '\\' + 'phenoArr' | |
if os.path.exists(path_arr) == False: | |
os.mkdir(path_arr) | |
name_arr = 'phenoArr_'+ str(x_start) + '_' + str(x_end) + '_'+ str(y_start) + '_' + str(y_end) | |
output_arr = path_arr+ '\\' + name_arr +'.pkl' | |
num = 0 | |
while os.path.isfile(output_arr): | |
num = num + 1 | |
output_arr = path_arr + '\\' + name_arr.split('.')[0] + '_'+str(num)+'.pkl' | |
with open(output_arr,'wb') as f: | |
pickle.dump(list(map_arr),f) | |
f.close() | |
def parallel_func(x_start): | |
y_start = 0 | |
y_end = 5360 | |
length = 81 | |
x_end = x_start+length | |
basedate_ = '20170101' | |
dateRange = ['20170303','20171003'] | |
work_dir = r'D:\Kapiti' | |
nc_file = r'F:\Kapiti\PlanetScope\test\NDVI_cloudMask_Oct_netCDF_S1_3.nc' | |
n_params = 7 | |
pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_QA = None, ds_DOY = None, | |
save_arr = True, parameters = None, plot = None, save_plot = None) | |
# list(map(parallel_func, tqdm_notebook(x_start_list, total=len(x_start_list), | |
# unit='chunk', desc = 'pheno_map'))) | |
def multi_processing(x_start_list, n_processor): | |
""" | |
USAGE | |
----------- | |
Run pheno_cal function using multiple processors | |
ARGUMENTS: type, example, description | |
--------- | |
x_start_list: list, list(np.arange(5305,5386,81)) | |
specify the location (column number) to split the whole image into small chunks | |
n_processor: numeric | |
number of processors | |
""" | |
if __name__ == '__main__': | |
p = Pool(n_processor) | |
p.map(parallel_func,x_start_list) | |
# multi_processing(x_start_list = list(np.arange(5305,5386,81)), n_processor = 8) | |
''' | |
Examples | |
----------------------------------------------------------------------------------- | |
# Phenology estimation from NDVI time series at a sample point | |
----------------------------------------------------------------------------------- | |
import xarray as xr | |
from datetime import datetime | |
import netCDF4 as nc4 | |
from glob import glob | |
import osgeo.gdal as gdal | |
def position_transform(ref_data, nxy = None, xy = None): | |
ds = gdal.Open(ref_data) | |
a = ds.ReadAsArray() | |
b = ds.GetGeoTransform() | |
if nxy is not None: | |
x = nxy[0]*b[1]+b[0] | |
y = nxy[1]*b[5]+b[3] | |
# print([x,y]) | |
if xy is not None: | |
nx = (xy[0]-b[0])/b[1] | |
ny = (xy[1]-b[3])/b[5] | |
# print([nx,ny]) | |
return (nx,ny) | |
point = [292265, 9823457] # sample = (2840,1373) | |
ref_data = r'D:\Kapiti\NDVI_cloudMask_reshape\20170303_NDVI_cloudMask_large.tif' | |
if point[0]<10000: | |
nxy = point | |
else: | |
nxy = position_transform(ref_data,xy=point) | |
y = int(nxy[1]) | |
x = int(nxy[0]) | |
fname = r'D:\Kapiti\netCDF\NDVI_cloudMask_netCDF_'+season+'_Sentinel2_clip.nc' # Remote OPeNDAP Dataset | |
ds_NDVI = xr.open_dataset(fname) | |
# print(ds_NDVI) | |
# ds_NDVI = ds_NDVI.sel(time = np.delete(np.array(ds_NDVI['time']), 13)) | |
# print(ds_NDVI['time']) | |
season = 'S4' | |
basedate_ = '20170101' | |
if season == 'S1': | |
dateRange = ['20170301','20171001'] | |
if season == 'S2': | |
dateRange = ['20170901','20180301'] | |
if season == 'S3': | |
dateRange = ['20180201','20181001'] | |
if season == 'S4': | |
dateRange = ['20180901','20190301'] | |
n_params = 7 | |
pheno_list_, fig = pheno_calc(ds_NDVI, y, x, basedate_, dateRange, n_params, plot = True, ds_cloudMask = None, fit = None) | |
----------------------------------------------------------------------------------- | |
# Phenology estimation from NDVI time series for a piece of region | |
----------------------------------------------------------------------------------- | |
y_start = 0 | |
y_end = 66 | |
x_start = 0 | |
x_end = 64 | |
basedate_ = '20170101' | |
season = 'S1' | |
ds_cloudMask = False | |
if season == 'S1': | |
dateRange = ['20170301','20171001'] | |
if season == 'S2': | |
dateRange = ['20170901','20180301'] | |
if season == 'S3': | |
dateRange = ['20180201','20181001'] | |
if season == 'S3': | |
dateRange = ['20180901','20190301'] | |
# dateRange = ['20180201','20181001'] | |
work_dir = r'D:\Kapiti\MODIS_'+season | |
nc_file = r'D:\Kapiti\MODIS\MODIS_netCDF_all.nc' | |
n_params = 7 | |
pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_cloudMask, | |
save_arr = True, parameters = None, plot = None, save_plot = None) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# visualization | |
def draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite, color_scheme = None, save_plot = None): | |
if os.path.exists(output_dir) == False: | |
os.mkdir(output_dir) | |
print('The output will be saved in this directory: ' + output_dir) | |
DOY_colorbar = None | |
# make a color map of fixed colors | |
param = file_name.split('_')[-1] | |
if color_scheme is 'date': | |
DOY_param = ['SOS20' , 'SOS50' , 'EOS20' , 'EOS50' , 'PS' , 'PS90'] | |
if param in DOY_param : | |
map_arr = np.array(map_arr) | |
if satellite is not 'Sentinel2_': | |
map_arr = np.where(map_arr>=0, map_arr+1, map_arr) | |
map_arr = np.where(map_arr <= 365, map_arr, map_arr-365) | |
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr) | |
# masked_array = np.ma.masked_where(map_arr==np.nan, map_arr) | |
cmap = colors.ListedColormap(['#98000D','#CB181C', '#EF3B2C', '#FB6A4B', '#FD9272', '#FBBD98', | |
'#FF9CCE', '#F97CE4', '#FF00FE', '#CC33B2', '#AB1092', '#8A008B', | |
'#01008A','#0000CC','#1974CD','#1E90FF','#87CEFA','#B0E1FF', | |
'#C6E9BF','#45FAB5','#00EE76','#00CD00','#018B00','#006401', | |
'#864D00','#A76101','#CC840C','#FFB901','#FFD800','#FFFF87', | |
'#E8E8E8','#D2D2D2','#B7B7B7','#9F9F9F','#888888','#6F6F6F']) | |
bounds=[0,10,20,31,40,49,59, | |
69,79,90,100,110,120, | |
130,140,151,161,171,181, | |
191,201,212,222,232,243, | |
253,263,273,283,293,304, | |
314,324,334,344,354,365] | |
normalize = colors.BoundaryNorm(bounds, cmap.N) | |
DOY_colorbar = 1 | |
else: | |
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr) | |
if satellite is 'Sentinel2_': | |
if param == 'cumNDVI50' or param == 'cumNDVI20': | |
map_arr2 = np.where(masked_array >0, map_arr * 30,np.nan) | |
else: | |
map_arr2 = np.where(masked_array >0, map_arr, np.nan) | |
else: | |
map_arr2 = np.where(masked_array >0, map_arr, np.nan) | |
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2) | |
vmax = np.nanpercentile(map_arr2, 99.5) | |
vmin = np.nanpercentile(map_arr2, 0.5) | |
cmap = matplotlib.cm.jet | |
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax) | |
if color_scheme is 'difference': | |
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr) | |
if satellite is 'Sentinel2_': | |
if param == 'cumNDVI50' or param == 'cumNDVI20': | |
map_arr2 = np.where(masked_array >0, map_arr * 30,np.nan) | |
else: | |
map_arr2 = np.where(masked_array >0, map_arr, np.nan) | |
DOY_param = ['SOS20' , 'SOS50' , 'EOS20' , 'EOS50' , 'PS' , 'PS90'] | |
if param in DOY_param : | |
mean_DOY = map_arr2[~np.isnan(map_arr2)].mean() | |
print(mean_DOY) | |
# [112,308,444] | |
# [182,354,564] | |
map_arr2 = np.where(masked_array >0, map_arr-564, np.nan) | |
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2) | |
# discrete | |
# cmap = colors.ListedColormap(['#a50026','#d73027', '#f46d43', '#fdae61', '#fee090', '#ffffff', | |
# '#e0f3f8', '#abd9e9', '#74add1', '#4575b4', '#313695']) | |
# bounds= [-35, -28, -21, -14, -7, -0.1, 0.1, 7, 14, 21, 28,35] | |
# normalize = colors.BoundaryNorm(bounds, cmap.N) | |
# continuous | |
# cdict2 = {'red': [(0.0, 0.0, 103/255), | |
# (0.5, 247/255, 247/255), | |
# (1.0, 5/255, 1.0)], | |
# 'green': [(0.0, 0.0, 0.0), | |
# (0.5, 247/255, 247/255), | |
# (1.0, 48/255, 1.0)], | |
# 'blue': [(0.0, 0.0, 31/255), | |
# (0.5, 247/255, 247/255), | |
# (1.0, 97/255, 1.0)]} | |
# blue_red2 = LinearSegmentedColormap('BlueRed2', cdict2) | |
# | |
# top = matplotlib.cm.get_cmap('#a50026', 128) | |
# bottom = matplotlib.cm.get_cmap('Blues', 128) | |
# newcolors = np.vstack((top(np.linspace(0, 1, 128)), | |
# bottom(np.linspace(0, 1, 128)))) | |
# blue_red2 = matplotlib.colors.ListedColormap(newcolors, name='BlueRed2') | |
# plt.register_cmap(cmap=blue_red2) | |
cmap = plt.get_cmap('RdYlBu') | |
normalize =matplotlib.colors.Normalize(vmin=-35, vmax=35) | |
else: | |
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2) | |
vmax = np.nanpercentile(map_arr2, 99.5) | |
vmin = np.nanpercentile(map_arr2, 0.5) | |
cmap = matplotlib.cm.jet | |
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax) | |
DOY_colorbar = 1 | |
else: | |
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr) | |
if satellite is 'Sentinel2_': | |
if param == 'cumNDVI50' or param == 'cumNDVI20': | |
map_arr2 = np.where(masked_array >0, map_arr * 30, np.nan) | |
else: | |
map_arr2 = np.where(masked_array >0, map_arr, np.nan) | |
else: | |
map_arr2 = np.where(masked_array >0, map_arr, np.nan) | |
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2) | |
# print(masked_array) | |
vmax = np.nanpercentile(map_arr2, 100) | |
vmin = np.nanpercentile(map_arr2, 0) | |
# cmap = matplotlib.cm.jet | |
cmap = plt.get_cmap('BrBG') | |
vmax = 110 | |
vmin = 40 | |
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax) | |
cmap.set_bad('white',1.) | |
fig, ax = plt.subplots(1,1) | |
mesh = ax.imshow(masked_array, cmap=cmap,norm=normalize) | |
b = (283386.0, 3.0, 0.0, 9827010.0, 0.0, -3.0) | |
ax.set_xticklabels(['', 0*b[1]+b[0],1000*b[1]+b[0],2000*b[1]+b[0],3000*b[1]+b[0],4000*b[1]+b[0],5000*b[1]+b[0]]) | |
ax.set_yticklabels(['', 0*b[5]+b[3],1000*b[5]+b[3],2000*b[5]+b[3],3000*b[5]+b[3],4000*b[5]+b[3],5000*b[5]+b[3]]) | |
ax.set_axis_off() | |
if color_scheme is 'difference': | |
if mean_DOY >= 365: | |
mean_DOY_ = mean_DOY - 365 | |
else: | |
mean_DOY_ = mean_DOY | |
# ax.text(0, 0.05, 'Average of ' + param +': '+ str(int(mean_DOY_+1)), | |
# horizontalalignment='left', | |
# verticalalignment='top', | |
# transform=ax.transAxes) | |
# plt.xticks(rotation=90) | |
if DOY_colorbar == 1: | |
# axins = inset_axes(ax, | |
# width="5%", # width = 10% of parent_bbox width | |
# height="30%", # height : 50% | |
# loc='lower left', | |
# bbox_to_anchor=(1.05, 0., 1, 1), | |
# bbox_transform=ax.transAxes, | |
# borderpad=0) | |
# discrete | |
# cb = plt.colorbar(mesh, cax = axins, cmap=cmap, norm=normalize, boundaries=bounds, ticks=bounds) | |
# cb.set_ticks(bounds) | |
# if color_scheme is 'date': | |
# cb.set_ticklabels(['.','Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.', | |
# 'Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.', | |
# 'Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.']) | |
# tick_locator = ticker.MaxNLocator(nbins=13) | |
# cb.locator = tick_locator | |
# cb.update_ticks() | |
# if color_scheme is 'difference': | |
# cb.set_ticklabels(['-35','','','','','0','','','','','35']) | |
# continuous | |
# cb = plt.colorbar(mesh, ax=ax,ticks = np.linspace(-35, 35, 11, endpoint=True)) | |
# tick_locator = ticker.MaxNLocator(nbins=7) | |
# cb.locator = tick_locator | |
# cb.update_ticks() | |
# cb.ax.tick_params(labelsize=20) | |
a = 1 | |
else: | |
# axins = inset_axes(ax, | |
# width="5%", # width = 10% of parent_bbox width | |
# height="30%", # height : 50% | |
# loc='lower left', | |
# bbox_to_anchor=(1.05, 0., 1, 1), | |
# bbox_transform=ax.transAxes, | |
# borderpad=0) | |
# cb = plt.colorbar(mesh, ax=ax) | |
# cb = plt.colorbar(mesh, ax=ax,extend = 'min',orientation = 'horizontal') | |
# cb.ax.tick_params(labelsize=20) | |
a = 1 | |
# scalebar = ScaleBar(3) # 1 pixel = 0.2 meter | |
# plt.gca().add_artist(scalebar) | |
# plt.xticks(fontsize=5) | |
# plt.yticks(fontsize=5) | |
plt.show() | |
fig.autofmt_xdate() | |
if save_plot is True: | |
path = output_dir + '\\' + file_name + extension | |
if os.path.exists(path): | |
num = 2 | |
path = output_dir + '\\' + file_name + '_' + str(num) + extension | |
while os.path.exists(path): | |
num+=1 | |
path = output_dir + '\\' + file_name + '_' + str(num) + extension | |
fig.savefig(path, dpi=300) | |
def func(): | |
satellite = 'MODIS' | |
def main(i): | |
season = 'S3' | |
work_dir = r'D:\Kapiti\MODIS\MODIS_'+season | |
output_dir = r'D:\Kapiti\MODIS\MODIS_'+season | |
extension = '.png' | |
save_plot = True | |
color_scheme = None | |
file_name ='MODIS'+'_phenoMap_'+season+'_0_64_0_66_'+i | |
with (open(work_dir+'\\phenoArr_'+season+'\\MODIS'+'_phenoArr_'+season+'_0_64_0_66_'+i+'.pkl', "rb")) as openfile: | |
while True: | |
try: | |
map_arr = pickle.load(openfile) | |
except EOFError: | |
break | |
# print(map_arr) | |
draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite,color_scheme, save_plot) | |
if satellite is 'Sentinel2_': | |
# pheno_params = ['maxNDVI'] | |
pheno_params = ['SOS20', 'SOS50', 'PS90', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', | |
'cumNDVI20', 'cumNDVI50', 'PearsonCC1','RMSD', 'MSD', 'nImages', 'maxGap'] | |
else: | |
pheno_params = ['cumNDVI50'] | |
# pheno_params = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', | |
# 'cumNDVI20', 'cumNDVI50', 'PearsonCC1','PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations'] | |
f = list(map(main,pheno_params)) | |
func() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment