Skip to content

Instantly share code, notes, and snippets.

@YanCheng-go
Last active April 24, 2024 06:41
Show Gist options
  • Save YanCheng-go/d4e17831f294199443d0f7682558e608 to your computer and use it in GitHub Desktop.
Save YanCheng-go/d4e17831f294199443d0f7682558e608 to your computer and use it in GitHub Desktop.
Extract phenological metrics after fitting a double hyperbolic tangent model
"""
Retrieving phenological metrics from NDVI time series after fitting a double hyperbolic tangent model
-----------------------------------------------------------------------------------------------------
Yan Cheng
chengyan2017@gmail.com
20/02/2019
Enschede, NL
------------
The important functions include:
1. data_initialize(basedate_, ndvi, dateRange) # clean data
2. upper_env_fit(X,X_,Y,p,imax,func, plot = None) # fit curve
3. phenoList(t,Y,t_, finalY, basedate_) # extract phenological metrics
The implementation of those functions for a specific objective:
# extract phenological metrics for one pixel
1. pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_QA = None, ds_DOY = None, fit = None, plot = None)
# extract phenological metrics for entire image
2. pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_QA = None, ds_DOY = None,
save_arr = None, parameters = None, plot = None, save_plot = None)
Other functions:
# for visualization
draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite, color_scheme = None, save_plot = None)
"""
import pandas as pd
import numpy as np
import xarray as xr
import netCDF4
from netCDF4 import Dataset
from datetime import timedelta as timedelta
from datetime import datetime
from datetime import timedelta as td
import jdcal
from scipy.optimize import leastsq
import lmfit
from lmfit import Model
from lmfit import Parameters
from scipy.integrate import simps
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
from math import sqrt
from multiprocessing import Pool
import pickle
import itertools
from tqdm import tqdm_notebook
import os.path
import warnings
import matplotlib.pyplot as plt
import matplotlib.cm
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
%matplotlib inline
plt.rcParams['figure.figsize'] = [7, 5]
def data_initialize(basedate_, ndvi, dateRange):
"""
USAGE
-----------
- clean data
- initialize parameter
ARGUMENTS: type, example, description
---------
basedate_: string, "20170101"
the date to be defined as 0 in the date list
ndvi: pd.DataFrame
NDVI time series, including date and NDVI
dateRange: list, ["20170301","20171001"]
the time window of phenology analysis (single season)
"""
# data clean
# remove duplicates
ndvi = ndvi.sort_values('NDVI', ascending=False).drop_duplicates('Date')
ndvi = ndvi.sort_values('Date')
ndvi = ndvi.reset_index(drop = True)
ndvi.columns = ['Date','NDVI']
# convert julia date
d = []
for i in ndvi['Date']:
dtime = int(sum(jdcal.gcal2jd(i.year, i.month, i.day))+1)
d.append(dtime)
# print(d)
df = pd.concat([pd.DataFrame(d),ndvi['NDVI']],axis=1)
df.columns = ['DOY','NDVI']
# remove negtive NDVI
df.loc[df['NDVI']<=0, 'NDVI'] = np.nan
df = df.dropna()
df = df.reset_index(drop = True)
# set constrains
min95_5perc = 0 # minimum difference between 95th and 5th percentile of valid NDVI data to start processing
# (0.20 for Schier)
minMedian = 0.15 # minimum median NDVI value to start processing (avoid processing in sea)
# (0.20 for Schiermonnikoog, 0.15 trial for Kenya)
# check if first or last index value is strange and remove (strange = smaller than 5th percentile minus 0.1 NDVI)
# --> this can result in very weird fits
percNDVI = np.percentile(np.array(df['NDVI']), [5,50,95])
# print(percNDVI)
if df.iloc[0,1] <= percNDVI[0]-0.1:
df.iloc[0,1] = np.nan
if df.iloc[-1,1] <= percNDVI[0]-0.1:
df.iloc[-1,1] = np.nan
df = df.dropna()
percNDVI = np.percentile(np.array(df['NDVI']), [5,50,95]) # recalculate 5th, 50th and 95th percentile in NDVI array
# convert string to date
basedate = datetime.strptime(basedate_,'%Y%m%d')
date1 = datetime.strptime(dateRange[0],'%Y%m%d')
date2 = datetime.strptime(dateRange[1],'%Y%m%d')
# covert date to julia date
basedate_j = int(sum(jdcal.gcal2jd(basedate.year, basedate.month, basedate.day))+1)
date1_j = int(sum(jdcal.gcal2jd(date1.year, date1.month, date1.day))+1)
date2_j = int(sum(jdcal.gcal2jd(date2.year, date2.month, date2.day))+1)
DOY1 = date1_j
DOY2 = date2_j
# calculate the julia date for midterm
midDOY = int(DOY1 + (DOY2-DOY1)/2)
# print(midDOY)
if df.shape[0] >= 4:
if percNDVI[2]-percNDVI[0] >= min95_5perc and percNDVI[1] >= minMedian and df.iloc[3,0]<midDOY and df.iloc[-4,0]> midDOY:
t_ = range(int(DOY1),(int(DOY2)+1)) # list of date
df_ = df.loc[(df['DOY']<=DOY2) & (df['DOY']>=DOY1) , ['NDVI','DOY']] # use the time series within the time frame
# of the study
df_ = df_.sort_values('DOY')
t = df_['DOY'].tolist() # original date list
NDVIseries =df_['NDVI'].tolist() # original NDVI list
indT1 = [df['DOY'].tolist().index(x) for x in t if x <= midDOY] # all observations in the green-up phase
indT2 = [df['DOY'].tolist().index(x) for x in t if x > midDOY] # all observations in the senesence phase
P = [-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0]
if indT1 != [] and indT2 != []:
NDVIseries1st = df.iloc[indT1, 1].tolist() # NDVI list in the green-up phase
NDVIseries2nd = df.iloc[indT2, 1].tolist() # NDVI list in the senesence phase
t1st = df.iloc[indT1, 0].tolist() # date list in the green-up phase
t2st = df.iloc[indT2, 0].tolist() # date list in the senesence phase
minNDVI1st = min(NDVIseries1st) # minimum NDVI value in the green-up phase
minNDVI2nd = min(NDVIseries2nd) # minimum NDVI value in the sensence phase
indMin1 = NDVIseries1st.index(minNDVI1st)
indMin2 = NDVIseries2nd.index(minNDVI2nd)
maxNDVI = max(NDVIseries)
# print(maxNDVI)
indMax = NDVIseries.index(maxNDVI)
# print(indMax)
# initialize parameters in the hyperbolic tangent model
P[0] = minNDVI1st # minimum value in first half series, change to: min(NDVIseries) to overall minimum?
P[1] = maxNDVI - minNDVI1st # difference between observed maximum and minimum value in first half series
P[2] = int(DOY1+(t[indMax]-DOY1)/2) # midpoint between start of optimization window and timing observed maximum
P[3] = 0.02 # for now fixed value: not sure how to get this better
P[4] = maxNDVI - minNDVI2nd # difference between observed maximum and minimum value in second half series
P[5] = int(t[indMax]+(DOY2-t[indMax])/2) # midpoint between timing observed maximum and end of optimization window
P[6] = -0.02 # for now fixed value: not sure how to get this better
else:
NDVIseries, t, t_, P = [None]*4
else:
NDVIseries, t, t_, P = [None]*4
return NDVIseries, t, t_, P
# user defined model
def tanh_Double(x, a, b, c, d, e, f, g):
return a + b * (np.tanh((x - c) * d) + 1) / 2.0 + e * (np.tanh((x - f) * g) + 1) / 2.0 - e
# Upper-envelope fitting algorithm
def upper_env_fit(X,X_,Y,p,imax,func, plot = None):
"""
USAGE
-----------
Find the upper envelop of user defined model
ARGUMENTS: type, example, description
---------
X: list
list of julia date for original date
X_: list
list of julia date for entire time frame (interval: 1 day)
Y: list
list of NDVI values for original date
imax: numeric, 10
maximum number of iterations
func: function
user defined model
plot (optional): boolean
expert figures or not
"""
a,b,c,d,e,f,g = p
params = Parameters()
params.add_many(('a', a, True, a/2, None, None, None),
('b', b, True, 0, 1.25*b, None, None),
('c', c, True, None, None, None, None),
('d', d, True, 0, 0.1, None, None),
('e', e, True, 0, 1.25*e, None, None),
('f', f, True, None, None, None, None),
('g', g, True, -0.10, -0.0000001, None, None))
gmodel = Model(func)
weights = [1]*len(X)
result = gmodel.fit(Y,params,x=X,weights = weights, method = 'leastsq')
params = result.best_values
predY = result.best_fit
lp = tuple([params['a'], params['b'], params['c'], params['d'], params['e'], params['f'], params['g']])
lpredY = predY
# print(p)
lp0 = lp
if plot is True:
plt.plot(X, Y, 'o',label='original')
plt.plot(X_, func(X_, *lp), 'b--',label='fit')
plt.show()
resu = []
dif = []
for i, pY in enumerate(predY):
dif.append(abs(Y[i]-pY))
resu.append(Y[i]-pY)
mm = max(dif)
# print(mm)
if mm != 0:
weights = [1]*len(dif)
index = [i for i in range(len(resu)) if resu[i] <= 0]
if len(index) != 0:
for i in index:
weights[i] = 1-dif[i]/mm
weights0=weights
gdis = sum([abs(dif[i]*weights[i]) for i in range(len(resu))])
ormax = gdis
for it in range(imax):
# print(it)
if gdis <= ormax:
a,b,c,d,e,f,g = p
params = Parameters()
##2019-01-14 reset the limitations of a
params.add_many(('a', a, True, a/2, None, None, None),
('b', b, True, 0, 1.25*b, None, None),
('c', c, True, None, None, None, None),
('d', d, True, 0, 0.1, None, None),
('e', e, True, 0, 1.25*e, None, None),
('f', f, True, None, None, None, None),
('g', g, True, -0.10, -0.0000001, None, None))
gmodel = Model(func)
result = gmodel.fit(Y,params,x=X, weights = weights, method = 'leastsq')
params = result.best_values
predY = result.best_fit
lp = tuple([params['a'], params['b'], params['c'], params['d'], params['e'], params['f'], params['g']])
lpredY = predY
resu = []
dif = []
for i, pY in enumerate(predY):
resu.append(Y[i]-pY)
dif.append(abs(Y[i]-pY))
mm = max(dif)
weights = [1]*len(dif)
index = [i for i in range(len(resu)) if resu[i] <= 0]
if len(index) != 0:
for i in index:
weights[i] = 1-dif[i]/mm
ormax = gdis
gdis = sum([abs(resu[i]*weights0[i]) for i in range(len(resu))])
if plot is True:
plt.plot(X, Y, 'o',label='original')
plt.plot(X_, func(X_, *lp), 'b--',label='fit')
plt.show()
n_it = it+2
# if the last fit is not correct, then use the second last fit
diffff = gdis-ormax
if gdis>ormax and diffff>0.5:
lp = lp0
# print(lp)
n_it = n_it-1
# print(n_it)
break
else:
lp0 = lp
# print(lp0)
if plot is True:
plt.plot(X, Y, 'o',label='original')
plt.plot(X_, func(X_, *lp), 'b--',label='fit')
plt.show()
# print('The number of iterations is: ' + str(n_it+1))
# print('\nThe optimal parameters are: ' + str(lp))
# print('\nThe fit report is: \n' + result.fit_report())
Y_ = func(X_,*lp)
# print(lp)
return lp,Y_, n_it
def phenoList(t,Y,t_, finalY, basedate_):
"""
USAGE
-----------
Extract phenological metrics from fitted curves
ARGUMENTS: type, example, description
---------
t: list
list of julia date for original date
t_: list
list of julia date for the entire time frame (interval: 1 day)
Y: list
list of NDVI values for original date
finalY: list
Xlist of predicted NDVI values for the entire time frame (interval: 1 day)
basedate_: string, '20170101'
the date to be defined as 0 in the date list
"""
basedate = datetime.strptime(basedate_,'%Y%m%d')
basedate_j = int(sum(jdcal.gcal2jd(basedate.year, basedate.month, basedate.day))+1)
t_ = np.array([i-basedate_j for i in t_])
DOY_start = int(t_[0])
DOY_end = int(t_[-1])
NDVI_start = finalY[0]
NDVI_end = finalY[-1]
df = pd.DataFrame([t_,finalY]).T
df.columns = ['DOY', 'NDVI']
df_orginal = pd.DataFrame([np.array(t),np.array(Y)]).T
df_orginal.columns = ['DOY', 'NDVI']
df_orginal['Gap'] = df_orginal.DOY.sub(df_orginal.DOY.shift())
df.set_index('DOY', inplace=True)
# print(df)
peak_DOY = int(df.NDVI.idxmax()) # DOY of the peak season
NDVI_upward = df.NDVI[DOY_start:peak_DOY]
NDVI_downward = df.NDVI[peak_DOY:DOY_end+1]
maxNDVI = df.NDVI.max() # maximum NDVI
AMP1 = maxNDVI - NDVI_upward.min()
AMP2 = maxNDVI - NDVI_downward.min()
def diff(maxNDVI, minNDVI, percentage, direction):
threshold = (maxNDVI - minNDVI) * percentage + minNDVI
diff = [i - threshold for i in direction]
return diff
diff_upward_20 = diff(maxNDVI, NDVI_upward.min(), 0.2, NDVI_upward)
diff_downward_20 = diff(maxNDVI, NDVI_downward.min(), 0.2, NDVI_downward)
SOS20 = t_[sum(i < 0 for i in diff_upward_20)]
EOS20 = sum(i > 0 for i in diff_downward_20)+peak_DOY-1
diff_upward_50 = diff(maxNDVI, NDVI_upward.min(), 0.5, NDVI_upward)
diff_downward_50 = diff(maxNDVI, NDVI_downward.min(), 0.5, NDVI_downward)
SOS50 = t_[sum(i < 0 for i in diff_upward_50)]
EOS50 = sum(i > 0 for i in diff_downward_50)+peak_DOY-1
diff_upward_90 = diff(maxNDVI, NDVI_start, 0.9, NDVI_upward)
PS90 = t_[sum(i < 0 for i in diff_upward_90)]
nImages = len(df_orginal)
maxGap = int(df_orginal.Gap.max())
if EOS20 > SOS20:
LGS20 = EOS20-SOS20
else:
LGS20 = -9998
if EOS50 > SOS50:
LGS50 = EOS50-SOS50
else:
LGS50 = -9998
warnings.filterwarnings('error')
try:
cumNDVI20 = simps(df.NDVI[SOS20:EOS20], dx=1)
except:
cumNDVI20 = -9998
try:
cumNDVI50 = simps(df.NDVI[SOS50:EOS50], dx=1)
except:
cumNDVI50 = -9998
t = np.array([i-basedate_j for i in t])
# print(t)
try:
PearsonCC = pearsonr(df.NDVI[t], Y)
except:
PearsonCC = (-9998,-9998)
PearsonCC1 = PearsonCC[0]
PearsonCC2 = PearsonCC[1]
try:
RMSD = sqrt(mean_squared_error(Y, df.NDVI[t]))
except:
RMSD = -9998
try:
MSD = np.mean(np.array(pd.DataFrame(Y)) - np.array(pd.DataFrame(df.NDVI[t]))) ## updated
# print(df.NDVI[t])
except:
MSD = -9998
phenoList_ = [SOS20, SOS50, PS90, peak_DOY, EOS20, EOS50, LGS20, LGS50, AMP1, AMP2, maxNDVI, cumNDVI20, cumNDVI50,
PearsonCC1,PearsonCC2, RMSD, MSD,nImages, maxGap]
# print(phenoList_)
return phenoList_
def pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask, fit = None, plot = None):
"""
USAGE
-----------
Retrieve phenological metrics for individual pixel in a image series
ARGUMENTS: type, example, description
---------
nc_file: string, r'F:\Kapiti\PlanetScope\test\NDVI_cloudMask_Oct_netCDF_S1_3.nc'
file path of netCDF data
y: numeric, 0
row number in the raster, >=0
x: numeric, 0
column number in the raster, >=0
basedate_: string, '20170101'
the date to be defined as 0 in the date list
dateRange: list, ["20170301","20171001"]
the time window of phenology analysis (single season)
n_params: numeric, 7
number of parameters in user defined model
ds_cloudMask (optional): boolean
use isolated quality layer and DOY layer or not (for MODIS)
fit (optional): string, 'upper_env_fit'
specific the curve fitting method
plot (optional): boolean
export figures or not
"""
# specifiy the curve fitting algorithm
if fit is None:
fit = 'upper_env_fit' # fit upper envelop of the user defined model
ds_NDVI = xr.open_dataset(nc_file)
# extract NDVI time series for a pixel (column number x, row number y)
dsloc = ds_NDVI.sel(x=x, y=y, method='nearest')
dsloc = dsloc.where(dsloc > 0) # remove negative values
NDVI = np.array(dsloc['NDVI']) # convert to np.array
Date_ = np.array(dsloc['time'])
# extract quality time series for a pixel (column number x, row number y)
if ds_cloudMask is True:
dsloc2 = ds_NDVI.sel(x=x, y=y, method='nearest')
# for MODIS
class_ = np.array(dsloc2['QA'])
dsloc2_bad = list(np.where((class_ == 3002)|(class_ == 3003))[0])
# set bad observations as NAN
def setNan(i):
NDVI[i]=np.nan
list(map(setNan,dsloc2_bad))
# remove bad observations from NDVI time series
df_nan = pd.DataFrame(NDVI).dropna()
if not df_nan.empty:
# for MODIS
# convert MODIS DOY layer to normal format of date
if ds_cloudMask is True:
Date_ = []
for idx, item in enumerate(np.array(dsloc['DOY'])):
# print(item)
if idx<=37:
Date_.append(datetime(2017, 1, 1) + timedelta(item - 1))
else:
Date_.append(datetime(2018, 1, 1) + timedelta(item - 1))
Date_ = np.array(Date_)
# print(Date_)
# NDVI time series
ndvi = pd.concat([pd.DataFrame(Date_), pd.DataFrame(NDVI)], axis = 1)
ndvi.columns = ['Date','NDVI']
# data initialization
Y, t, t_, P = data_initialize(basedate_, ndvi, dateRange)
# check if data is validated
if Y is None:
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20',
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations']
# phenoList_ = {key: -9999 for key in keys}
phenoList_ = [-9998]*len(keys)
if plot is True:
print(phenoList_)
else:
n_obs = len(Y)
# curve fitting can be executed only when the number of observaitons are more than the number of parameters
if n_obs > n_params:
n_it = None # initialize the number of iterations
# fit upper envelop of the double hyperbolic tangent model
if fit == 'upper_env_fit':
lp,Y_,n_it = upper_env_fit(X=t,X_=t_,Y=Y,p=P,imax=10,func=tanh_Double)
# if fit == 'non_weighted_fit':
# lp,Y_ = non_weighted_fit(X=t,X_=t_,Y=Y,p=P,func=tanh_Double)
# if fit == 'upper_env_fit2':
# lp,Y_ = upper_env_fit2(X=t,X_=t_,Y=Y,p=P,imax=10,func=tanh_Double)
# extract phenological metrics from fitted curves
phenoList_ = phenoList(t,Y,t_, Y_, basedate_)
if n_it is not None:
phenoList_.append(n_it)
if plot is True:
print(phenoList_)
color_list = ['#1b9e77', '#d95f02','#1b9e77']
color = color_list[2]
# fig = plt.figure()
fig,ax1 = plt.subplots(figsize = (12,8))
gd = [julian.from_jd(i, fmt='jd') for i in t]
# print(gd)
gd_ = [julian.from_jd(i, fmt='jd') for i in t_]
plt.plot(gd,Y,color = color , marker = 'o', linewidth = 0)
plt.plot(gd_,Y_, color = color,linewidth = 4)
x1 = datetime(2017, 1, 1,12,0,0) + timedelta(int(phenoList_[1]))
x2 = datetime(2017, 1, 1,12,0,0) + timedelta(int(phenoList_[5]))
plt.plot([x1, x1],[0,0.1], color = color, linewidth = 4)
plt.plot([x2, x2],[0,0.1], color = color, linewidth = 4)
date_fill = [x1 + timedelta(days=x) for x in range(0, (x2-x1).days+1)]
df = pd.DataFrame({'Date':gd_, 'NDVI': Y_})
df_fill = df.loc[df['Date'].isin(date_fill)]
plt.fill_between(date_fill,np.array(df_fill['NDVI']), alpha = 0.2, color = '#1b9e77')
# plt.text(x, y, s, fontsize=12)
plt.ylim(0)
plt.xticks(fontsize = 15, rotation = 0)
plt.yticks(fontsize = 15)
# ax1.set_ylim(0.3,0.6)
# plt.setp(ax1.get_yticklabels()[-1], visible=False)
# plt.setp(ax1.get_yticklabels()[0], visible=False)
ax1.tick_params(axis="x",direction="in", which = 'major', length = 10)
ax1.tick_params(axis="x",direction="in", which = 'minor', length = 5)
ax1.tick_params(axis="y",direction="in", which = 'major', length = 10)
# ax1.xaxis.set_tick_params(labelsize = 20, rotation = 0, which = 'both')
# ax1.yaxis.set_tick_params(labelsize = 20)
# ax1.set_ylabel('Camera-derived GCC', fontsize = 20)
legend_elements = [Line2D([0], [0], color=color_list[0], lw=4, label='Location A'),
Line2D([0], [0], color=color_list[1], lw=4, label='Location A (modified)'),
Line2D([0], [0], color=color_list[2], lw=4, label='Location B')]
# red_patch = mpatches.Patch(edgecolor=color[0],fill = False, linewidth = 2,label='Location A')
# blue_patch = mpatches.Patch(edgecolor=color[1], fill = False,linewidth = 2,label='Location A (modified)')
# yellow_patch = mpatches.Patch(edgecolor=color[2], fill = False,linewidth = 2, label='Location B')
# L1 = plt.plot([0],[0],color = color[0],label = 'Location A')
# L2 = plt.plot([0],[0],color = color[1],label = 'Location A (modified)')
# L3 = plt.plot([0],[0],color = color[2],label = 'Location B')
# plt.legend(handles=[L1,L2,L3], fontsize = 20)
# plt.legend()
# plt.legend(handles=[red_patch, blue_patch,yellow_patch], fontsize = 15)
# plt.legend(handles=legend_elements, fontsize = 15,loc = [1.5,0.5])
plt.ylabel('NDVI', fontsize = 15)
plt.xlabel('Date (Year-Month)', fontsize = 15)
plt.show()
else:
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI',
'cumNDVI20', 'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations']
# phenoList_ = {key: -9999 for key in keys}
phenoList_ = [-9998]*len(keys)
else:
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20',
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations']
# phenoList_ = {key: -9999 for key in keys}
phenoList_ = [-9999]*len(keys)
return phenoList_
def pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_cloudMask,
save_arr = None, parameters = None, plot = None, save_plot = None):
"""
USAGE
-----------
Retrieve phenological metrics for specific area in the image
ARGUMENTS: type, example, description
---------
y_start, y_end, x_start, x_end: numeric, 0, 100, 0, 100
specify an area in the image
basedate_: string, '20170101'
the date to be defined as 0 in the date list
dateRange: list, ["20170301","20171001"]
the time window of phenology analysis (single season)
n_params: numeric, 7
the number of parameters in the user defined model
nc_file: string, r'D:\Kapiti\netCDF\NDVI_cloudMask_netCDF_S1.nc'
file path of netCDF data
work_dir: string, r'D:\Kapiti'
file path of work directory
ds_cloudMask (optional): boolean
use isolated quality layer and DOY layer or not (for MODIS)
fit (optional): string, 'upper_env_fit'
specific the curve fitting method
save_arr (optional): boolean
save phenology map (array) or not
parameters (optional): string, 'SOS50'
specify a phenological metric
plot (optional): boolean
export figures or not
save_plot (optional): boolean
save phenology map (figure) or not
"""
if parameters is None:
parameters_select = parameters
y_range = range(y_start,y_end)
n_y = y_end-y_start
x_range = range(x_start,x_end)
n_x = x_end-x_start
keys = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI', 'cumNDVI20',
'cumNDVI50', 'PearsonCC1', 'PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations']
var = len(keys)
# ds_NDVI = xr.open_dataset(nc_file)
coordinate_list = list(itertools.product(y_range, x_range))
map_arr = np.zeros((n_y,n_x,var))
if parameters is None:
def fun(coordinate):
y,x = coordinate
i = y-y_start
j = x-x_start
# map_arr[i][j][:] = list(pheno_calc(ds_NDVI, y, x, basedate_, dateRange).values())
map_arr[i][j] = pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask)
else:
def fun(coordinate):
y,x = coordinate
i = y-y_start
j = x-x_start
map_arr[i][j] = \
pheno_calc(nc_file, y, x, basedate_, dateRange, n_params, ds_cloudMask)[keys.index(parameters_select)]
list(map(fun, tqdm_notebook(coordinate_list, total=len(coordinate_list),
unit='pixel', desc = 'pheno_map')))
if plot is True:
masked_array = np.ma.masked_where(map_arr==-9999, map_arr)
## visuallization
cmap = matplotlib.cm.jet
cmap.set_bad('white',1.)
# cm = m.colors.LinearSegmentedColormap('my_colormap', cdict, 1024)
normalize =matplotlib.colors.Normalize(vmin=75, vmax=150)
fig, ax = plt.subplots(1,1)
ax.imshow(masked_array, cmap=cmap,norm=normalize)
ax.set_xticks(np.arange(-.5, 1, (x_end-x_start)/2))
ax.set_yticks(np.arange(-.5, 1, (y_end-y_start)/2))
ax.set_xticklabels(np.arange(x_start, x_end, (x_end-x_start)/2))
ax.set_yticklabels(np.arange(y_start, y_end, (y_end-y_start)/2))
# plt.colorbar()
plt.show()
if save_plot is True:
path_plot = work_dir + '\\' + 'phenoMap'
fig.savefig(path_plot +'\\' + 'phenoMap_'+ str(x_start) + '_' + str(x_end) + '_'+ str(y_start) + '_' + str(y_end))
if save_arr is True:
if os.path.exists(work_dir) == False:
os.mkdir(work_dir)
path_arr = work_dir + '\\' + 'phenoArr'
if os.path.exists(path_arr) == False:
os.mkdir(path_arr)
name_arr = 'phenoArr_'+ str(x_start) + '_' + str(x_end) + '_'+ str(y_start) + '_' + str(y_end)
output_arr = path_arr+ '\\' + name_arr +'.pkl'
num = 0
while os.path.isfile(output_arr):
num = num + 1
output_arr = path_arr + '\\' + name_arr.split('.')[0] + '_'+str(num)+'.pkl'
with open(output_arr,'wb') as f:
pickle.dump(list(map_arr),f)
f.close()
def parallel_func(x_start):
y_start = 0
y_end = 5360
length = 81
x_end = x_start+length
basedate_ = '20170101'
dateRange = ['20170303','20171003']
work_dir = r'D:\Kapiti'
nc_file = r'F:\Kapiti\PlanetScope\test\NDVI_cloudMask_Oct_netCDF_S1_3.nc'
n_params = 7
pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_QA = None, ds_DOY = None,
save_arr = True, parameters = None, plot = None, save_plot = None)
# list(map(parallel_func, tqdm_notebook(x_start_list, total=len(x_start_list),
# unit='chunk', desc = 'pheno_map')))
def multi_processing(x_start_list, n_processor):
"""
USAGE
-----------
Run pheno_cal function using multiple processors
ARGUMENTS: type, example, description
---------
x_start_list: list, list(np.arange(5305,5386,81))
specify the location (column number) to split the whole image into small chunks
n_processor: numeric
number of processors
"""
if __name__ == '__main__':
p = Pool(n_processor)
p.map(parallel_func,x_start_list)
# multi_processing(x_start_list = list(np.arange(5305,5386,81)), n_processor = 8)
'''
Examples
-----------------------------------------------------------------------------------
# Phenology estimation from NDVI time series at a sample point
-----------------------------------------------------------------------------------
import xarray as xr
from datetime import datetime
import netCDF4 as nc4
from glob import glob
import osgeo.gdal as gdal
def position_transform(ref_data, nxy = None, xy = None):
ds = gdal.Open(ref_data)
a = ds.ReadAsArray()
b = ds.GetGeoTransform()
if nxy is not None:
x = nxy[0]*b[1]+b[0]
y = nxy[1]*b[5]+b[3]
# print([x,y])
if xy is not None:
nx = (xy[0]-b[0])/b[1]
ny = (xy[1]-b[3])/b[5]
# print([nx,ny])
return (nx,ny)
point = [292265, 9823457] # sample = (2840,1373)
ref_data = r'D:\Kapiti\NDVI_cloudMask_reshape\20170303_NDVI_cloudMask_large.tif'
if point[0]<10000:
nxy = point
else:
nxy = position_transform(ref_data,xy=point)
y = int(nxy[1])
x = int(nxy[0])
fname = r'D:\Kapiti\netCDF\NDVI_cloudMask_netCDF_'+season+'_Sentinel2_clip.nc' # Remote OPeNDAP Dataset
ds_NDVI = xr.open_dataset(fname)
# print(ds_NDVI)
# ds_NDVI = ds_NDVI.sel(time = np.delete(np.array(ds_NDVI['time']), 13))
# print(ds_NDVI['time'])
season = 'S4'
basedate_ = '20170101'
if season == 'S1':
dateRange = ['20170301','20171001']
if season == 'S2':
dateRange = ['20170901','20180301']
if season == 'S3':
dateRange = ['20180201','20181001']
if season == 'S4':
dateRange = ['20180901','20190301']
n_params = 7
pheno_list_, fig = pheno_calc(ds_NDVI, y, x, basedate_, dateRange, n_params, plot = True, ds_cloudMask = None, fit = None)
-----------------------------------------------------------------------------------
# Phenology estimation from NDVI time series for a piece of region
-----------------------------------------------------------------------------------
y_start = 0
y_end = 66
x_start = 0
x_end = 64
basedate_ = '20170101'
season = 'S1'
ds_cloudMask = False
if season == 'S1':
dateRange = ['20170301','20171001']
if season == 'S2':
dateRange = ['20170901','20180301']
if season == 'S3':
dateRange = ['20180201','20181001']
if season == 'S3':
dateRange = ['20180901','20190301']
# dateRange = ['20180201','20181001']
work_dir = r'D:\Kapiti\MODIS_'+season
nc_file = r'D:\Kapiti\MODIS\MODIS_netCDF_all.nc'
n_params = 7
pheno_map(y_start, y_end, x_start, x_end, basedate_, dateRange, n_params, nc_file, work_dir, ds_cloudMask,
save_arr = True, parameters = None, plot = None, save_plot = None)
# visualization
def draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite, color_scheme = None, save_plot = None):
if os.path.exists(output_dir) == False:
os.mkdir(output_dir)
print('The output will be saved in this directory: ' + output_dir)
DOY_colorbar = None
# make a color map of fixed colors
param = file_name.split('_')[-1]
if color_scheme is 'date':
DOY_param = ['SOS20' , 'SOS50' , 'EOS20' , 'EOS50' , 'PS' , 'PS90']
if param in DOY_param :
map_arr = np.array(map_arr)
if satellite is not 'Sentinel2_':
map_arr = np.where(map_arr>=0, map_arr+1, map_arr)
map_arr = np.where(map_arr <= 365, map_arr, map_arr-365)
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr)
# masked_array = np.ma.masked_where(map_arr==np.nan, map_arr)
cmap = colors.ListedColormap(['#98000D','#CB181C', '#EF3B2C', '#FB6A4B', '#FD9272', '#FBBD98',
'#FF9CCE', '#F97CE4', '#FF00FE', '#CC33B2', '#AB1092', '#8A008B',
'#01008A','#0000CC','#1974CD','#1E90FF','#87CEFA','#B0E1FF',
'#C6E9BF','#45FAB5','#00EE76','#00CD00','#018B00','#006401',
'#864D00','#A76101','#CC840C','#FFB901','#FFD800','#FFFF87',
'#E8E8E8','#D2D2D2','#B7B7B7','#9F9F9F','#888888','#6F6F6F'])
bounds=[0,10,20,31,40,49,59,
69,79,90,100,110,120,
130,140,151,161,171,181,
191,201,212,222,232,243,
253,263,273,283,293,304,
314,324,334,344,354,365]
normalize = colors.BoundaryNorm(bounds, cmap.N)
DOY_colorbar = 1
else:
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr)
if satellite is 'Sentinel2_':
if param == 'cumNDVI50' or param == 'cumNDVI20':
map_arr2 = np.where(masked_array >0, map_arr * 30,np.nan)
else:
map_arr2 = np.where(masked_array >0, map_arr, np.nan)
else:
map_arr2 = np.where(masked_array >0, map_arr, np.nan)
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2)
vmax = np.nanpercentile(map_arr2, 99.5)
vmin = np.nanpercentile(map_arr2, 0.5)
cmap = matplotlib.cm.jet
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
if color_scheme is 'difference':
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr)
if satellite is 'Sentinel2_':
if param == 'cumNDVI50' or param == 'cumNDVI20':
map_arr2 = np.where(masked_array >0, map_arr * 30,np.nan)
else:
map_arr2 = np.where(masked_array >0, map_arr, np.nan)
DOY_param = ['SOS20' , 'SOS50' , 'EOS20' , 'EOS50' , 'PS' , 'PS90']
if param in DOY_param :
mean_DOY = map_arr2[~np.isnan(map_arr2)].mean()
print(mean_DOY)
# [112,308,444]
# [182,354,564]
map_arr2 = np.where(masked_array >0, map_arr-564, np.nan)
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2)
# discrete
# cmap = colors.ListedColormap(['#a50026','#d73027', '#f46d43', '#fdae61', '#fee090', '#ffffff',
# '#e0f3f8', '#abd9e9', '#74add1', '#4575b4', '#313695'])
# bounds= [-35, -28, -21, -14, -7, -0.1, 0.1, 7, 14, 21, 28,35]
# normalize = colors.BoundaryNorm(bounds, cmap.N)
# continuous
# cdict2 = {'red': [(0.0, 0.0, 103/255),
# (0.5, 247/255, 247/255),
# (1.0, 5/255, 1.0)],
# 'green': [(0.0, 0.0, 0.0),
# (0.5, 247/255, 247/255),
# (1.0, 48/255, 1.0)],
# 'blue': [(0.0, 0.0, 31/255),
# (0.5, 247/255, 247/255),
# (1.0, 97/255, 1.0)]}
# blue_red2 = LinearSegmentedColormap('BlueRed2', cdict2)
#
# top = matplotlib.cm.get_cmap('#a50026', 128)
# bottom = matplotlib.cm.get_cmap('Blues', 128)
# newcolors = np.vstack((top(np.linspace(0, 1, 128)),
# bottom(np.linspace(0, 1, 128))))
# blue_red2 = matplotlib.colors.ListedColormap(newcolors, name='BlueRed2')
# plt.register_cmap(cmap=blue_red2)
cmap = plt.get_cmap('RdYlBu')
normalize =matplotlib.colors.Normalize(vmin=-35, vmax=35)
else:
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2)
vmax = np.nanpercentile(map_arr2, 99.5)
vmin = np.nanpercentile(map_arr2, 0.5)
cmap = matplotlib.cm.jet
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
DOY_colorbar = 1
else:
masked_array = np.ma.masked_where((map_arr==-9999) | (map_arr == -9998), map_arr)
if satellite is 'Sentinel2_':
if param == 'cumNDVI50' or param == 'cumNDVI20':
map_arr2 = np.where(masked_array >0, map_arr * 30, np.nan)
else:
map_arr2 = np.where(masked_array >0, map_arr, np.nan)
else:
map_arr2 = np.where(masked_array >0, map_arr, np.nan)
masked_array = np.ma.masked_where(map_arr2==np.nan, map_arr2)
# print(masked_array)
vmax = np.nanpercentile(map_arr2, 100)
vmin = np.nanpercentile(map_arr2, 0)
# cmap = matplotlib.cm.jet
cmap = plt.get_cmap('BrBG')
vmax = 110
vmin = 40
normalize =matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)
cmap.set_bad('white',1.)
fig, ax = plt.subplots(1,1)
mesh = ax.imshow(masked_array, cmap=cmap,norm=normalize)
b = (283386.0, 3.0, 0.0, 9827010.0, 0.0, -3.0)
ax.set_xticklabels(['', 0*b[1]+b[0],1000*b[1]+b[0],2000*b[1]+b[0],3000*b[1]+b[0],4000*b[1]+b[0],5000*b[1]+b[0]])
ax.set_yticklabels(['', 0*b[5]+b[3],1000*b[5]+b[3],2000*b[5]+b[3],3000*b[5]+b[3],4000*b[5]+b[3],5000*b[5]+b[3]])
ax.set_axis_off()
if color_scheme is 'difference':
if mean_DOY >= 365:
mean_DOY_ = mean_DOY - 365
else:
mean_DOY_ = mean_DOY
# ax.text(0, 0.05, 'Average of ' + param +': '+ str(int(mean_DOY_+1)),
# horizontalalignment='left',
# verticalalignment='top',
# transform=ax.transAxes)
# plt.xticks(rotation=90)
if DOY_colorbar == 1:
# axins = inset_axes(ax,
# width="5%", # width = 10% of parent_bbox width
# height="30%", # height : 50%
# loc='lower left',
# bbox_to_anchor=(1.05, 0., 1, 1),
# bbox_transform=ax.transAxes,
# borderpad=0)
# discrete
# cb = plt.colorbar(mesh, cax = axins, cmap=cmap, norm=normalize, boundaries=bounds, ticks=bounds)
# cb.set_ticks(bounds)
# if color_scheme is 'date':
# cb.set_ticklabels(['.','Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.',
# 'Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.',
# 'Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep.','Oct.','Nov.','Dec.'])
# tick_locator = ticker.MaxNLocator(nbins=13)
# cb.locator = tick_locator
# cb.update_ticks()
# if color_scheme is 'difference':
# cb.set_ticklabels(['-35','','','','','0','','','','','35'])
# continuous
# cb = plt.colorbar(mesh, ax=ax,ticks = np.linspace(-35, 35, 11, endpoint=True))
# tick_locator = ticker.MaxNLocator(nbins=7)
# cb.locator = tick_locator
# cb.update_ticks()
# cb.ax.tick_params(labelsize=20)
a = 1
else:
# axins = inset_axes(ax,
# width="5%", # width = 10% of parent_bbox width
# height="30%", # height : 50%
# loc='lower left',
# bbox_to_anchor=(1.05, 0., 1, 1),
# bbox_transform=ax.transAxes,
# borderpad=0)
# cb = plt.colorbar(mesh, ax=ax)
# cb = plt.colorbar(mesh, ax=ax,extend = 'min',orientation = 'horizontal')
# cb.ax.tick_params(labelsize=20)
a = 1
# scalebar = ScaleBar(3) # 1 pixel = 0.2 meter
# plt.gca().add_artist(scalebar)
# plt.xticks(fontsize=5)
# plt.yticks(fontsize=5)
plt.show()
fig.autofmt_xdate()
if save_plot is True:
path = output_dir + '\\' + file_name + extension
if os.path.exists(path):
num = 2
path = output_dir + '\\' + file_name + '_' + str(num) + extension
while os.path.exists(path):
num+=1
path = output_dir + '\\' + file_name + '_' + str(num) + extension
fig.savefig(path, dpi=300)
def func():
satellite = 'MODIS'
def main(i):
season = 'S3'
work_dir = r'D:\Kapiti\MODIS\MODIS_'+season
output_dir = r'D:\Kapiti\MODIS\MODIS_'+season
extension = '.png'
save_plot = True
color_scheme = None
file_name ='MODIS'+'_phenoMap_'+season+'_0_64_0_66_'+i
with (open(work_dir+'\\phenoArr_'+season+'\\MODIS'+'_phenoArr_'+season+'_0_64_0_66_'+i+'.pkl', "rb")) as openfile:
while True:
try:
map_arr = pickle.load(openfile)
except EOFError:
break
# print(map_arr)
draw_map(map_arr, work_dir, output_dir, file_name, extension, satellite,color_scheme, save_plot)
if satellite is 'Sentinel2_':
# pheno_params = ['maxNDVI']
pheno_params = ['SOS20', 'SOS50', 'PS90', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI',
'cumNDVI20', 'cumNDVI50', 'PearsonCC1','RMSD', 'MSD', 'nImages', 'maxGap']
else:
pheno_params = ['cumNDVI50']
# pheno_params = ['SOS20', 'SOS50', 'PS90', 'PS', 'EOS20', 'EOS50', 'LGS20', 'LGS50', 'AMP1', 'AMP2', 'maxNDVI',
# 'cumNDVI20', 'cumNDVI50', 'PearsonCC1','PearsonCC2','RMSD', 'MSD', 'nImages', 'maxGap', 'nIterations']
f = list(map(main,pheno_params))
func()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment