Skip to content

Instantly share code, notes, and snippets.

@e5k
Last active March 16, 2022 10:07
Show Gist options
  • Save e5k/19cf2243111f984ff9f60a6f9b423d6c to your computer and use it in GitHub Desktop.
Save e5k/19cf2243111f984ff9f60a6f9b423d6c to your computer and use it in GitHub Desktop.
GEE
# %%
import GEE as gee
from GEElt.collections import MODISVImerge, reduceAnomaly, getTime, sequentialMap
from GEElt.ts import ts_extract
from GEElt.resources import *
from GEElt.colorbrewer import *
from geetools import bitreader
import numpy as np
import pandas as pd
import geopandas as gpd
import sqlite3 as db
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from pandas.plotting import scatter_matrix
import datetime as dt
from dateutil.relativedelta import *
import ee, eemont, geemap, wxee
ee.Initialize()
#%% Load Noa's data and create a featureCollection
df = pd.read_excel('/Users/seb/Documents/WORK/Projects/Taal/Noa/Survey_data.xlsx',header=1).set_index('id')
# .drop_duplicates(subset=['x', 'y'])
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=32651).to_crs(4326)
fc = geemap.geopandas_to_ee(gdf[['geometry']])
# %% Create a MODIS collection and export it
MODIS = MODISVImerge(bands=['EVI', 'NDVI', 'DetailedQA', 'SummaryQA'], tstart='2015-01-01', tend='2021-12-31')
ts = MODIS.getTimeSeriesByRegions(reducer = ee.Reducer.mean(),
collection = fc,
bands = ['EVI',],
scale = 250)
ee.batch.Export.table.toDrive(collection=ts, description='MODIS_Taal', fileFormat='csv').start()
#%% Compute server-side monthly composite for comparison
MODIS1, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=1, tstart='2015-01-01', tend='2021-12-31' )
MODIS2, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=2, tstart='2015-01-01', tend='2021-12-31' )
MODIS3, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=3, tstart='2015-01-01', tend='2021-12-31' )
# %% Load exported data and format it for timesat
data = pd.read_csv('/Volumes/GoogleDrive/My Drive/MODIS_Taal.csv')
data = data.drop(['system:index', 'reducer', '.geo'], axis=1).rename({'date':'time'},axis=1)
data['time'] = pd.to_datetime(data['time'])
data = data.drop_duplicates(subset=['id', 'time'])
# FillNa
data = data.set_index(['id','time']).sort_index()
data[data==-9999] = np.nan
data = data.fillna(method='ffill')
data = data.reset_index().set_index('id')
# %% Rolling mean & plotting for comparison
id = 12
x, y = gdf.loc[id].geometry.x,gdf.loc[id].geometry.y
ts1 = ts_extract(MODIS1, x, y, 250)
ts2 = ts_extract(MODIS2, x, y, 250)
ts3 = ts_extract(MODIS3, x, y, 250)
rol1 = data.loc[id].reset_index().set_index('time').rolling(4, win_type='gaussian', center=True).mean(std=3).reset_index()
rol2 = data.loc[id].reset_index().set_index('time').rolling(8, win_type='gaussian', center=True).mean(std=3).reset_index()
rol3 = data.loc[id].reset_index().set_index('time').rolling(12, win_type='gaussian', center=True).mean(std=3).reset_index()
fig, (ax1, ax2) = plt.subplots(2,figsize=[10,8])
ts1.plot(x='time',y='EVI', ax=ax1, label='1 month')
ts2.plot(x='time',y='EVI', ax=ax1, label='2 month')
ts3.plot(x='time',y='EVI', ax=ax1, label='3 month')
rol1.plot(x='time', y='EVI', ax=ax2, label='1 month')
rol2.plot(x='time', y='EVI', ax=ax2, label='2 month')
rol3.plot(x='time', y='EVI', ax=ax2, label='3 month')
#%% [markdown]
# Alternative to [[gist.GEE.RollingMean_Smoothing]] using rolling window + weighted average client side with a Gaussian kernel.
# %%
from GEElt.collections import MODISVImerge, reduceAnomaly
from GEElt.ts import ts_extract
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import datetime as dt
from dateutil.relativedelta import *
import ee, eemont, geemap, wxee
ee.Initialize()
#%% Load Noa's data and create a featureCollection
df = pd.read_excel('/Users/seb/Documents/WORK/Projects/Taal/Noa/Survey_data.xlsx',header=1).set_index('id')
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=32651).to_crs(4326)
fc = geemap.geopandas_to_ee(gdf[['geometry']])
# %% Create a MODIS collection and export it
MODIS = MODISVImerge(bands=['EVI', 'NDVI', 'DetailedQA', 'SummaryQA'], tstart='2015-01-01', tend='2021-12-31')
ts = MODIS.getTimeSeriesByRegions(reducer = [ee.Reducer.mean(),ee.Reducer.median()],
collection = fc,
bands = ['EVI','NDVI', 'DetailedQA', 'SummaryQA'],
scale = 250)
ee.batch.Export.table.toDrive(collection=ts, description='MODIS_Taal_QA', fileFormat='csv').start()
#%% Compute server-side monthly composite for comparison
MODIS1, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=1, tstart='2015-01-01', tend='2021-12-31' )
MODIS2, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=2, tstart='2015-01-01', tend='2021-12-31' )
MODIS3, _, _, _ = reduceAnomaly(MODIS, reducer='median', nUnit=3, tstart='2015-01-01', tend='2021-12-31' )
# %% Load exported data and format it for timesat
data = pd.read_csv('/Volumes/GoogleDrive/My Drive/MODIS_Taal_QA.csv')
data = data.drop(['system:index', 'reducer', '.geo', 'DetailedQA'], axis=1).rename({'date':'time'},axis=1)
data['time'] = pd.to_datetime(data['time'])
data = data.drop_duplicates(subset=['id', 'time'])
# make (arbitrary) weights
wt = {
0: 3,
1: 2,
2: 1,
3: 1,
np.nan: 1e-9}
# Remap the weights to the QA
data['SummaryQA'] = data['SummaryQA'].map(wt)
# FillNa
data = data.set_index(['id','time']).sort_index()
data[data==-9999] = np.nan
data = data.fillna(method='ffill')
data = data.reset_index().set_index('id')
# %% Function to compute weighted average from:
# https://stackoverflow.com/questions/47268976/pandas-rolling-weighted-average
def get_weighted_average(dataframe,window,columnname_data,columnname_weights):
processed_dataframe=dataframe.loc[:,(columnname_data,columnname_weights)].set_index(columnname_weights)
def get_mean_withweights(processed_dataframe_windowed):
return np.average(a=processed_dataframe_windowed,weights=processed_dataframe_windowed.index)
return processed_dataframe.rolling(window=window, win_type='gaussian', center=True).apply(func=get_mean_withweights,raw=False)
# %%
id = 12
x, y = gdf.loc[id].geometry.x,gdf.loc[id].geometry.y
# Weighted average
wa = data.loc[id].copy()
wa['wa1'] = get_weighted_average(wa,4,'EVI','SummaryQA')['EVI'].values
wa['wa2'] = get_weighted_average(wa,8,'EVI','SummaryQA')['EVI'].values
wa['wa3'] = get_weighted_average(wa,12,'EVI','SummaryQA')['EVI'].values
# Server-side mean
ts1 = ts_extract(MODIS1, x, y, 250)
ts2 = ts_extract(MODIS2, x, y, 250)
ts3 = ts_extract(MODIS3, x, y, 250)
# Client-side rolling
rol1 = data.loc[id].reset_index().set_index('time').rolling(4, win_type='gaussian', center=True).mean(std=3).reset_index()
rol2 = data.loc[id].reset_index().set_index('time').rolling(8, win_type='gaussian', center=True).mean(std=3).reset_index()
rol3 = data.loc[id].reset_index().set_index('time').rolling(12, win_type='gaussian', center=True).mean(std=3).reset_index()
# %%
fig, axs = plt.subplots(nrows=3, ncols=2,figsize=[20,8])
axs[0,0].set_title('Server-side')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[0,0])
ts1.plot(x='time',y='EVI', ax=axs[0,0], label='1 month')
ts2.plot(x='time',y='EVI', ax=axs[0,0], label='2 month')
ts3.plot(x='time',y='EVI', ax=axs[0,0], label='3 month')
axs[1,0].set_title('Rolling')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[1,0])
rol1.plot(x='time', y='EVI', ax=axs[1,0], label='1 month')
rol2.plot(x='time', y='EVI', ax=axs[1,0], label='2 month')
rol3.plot(x='time', y='EVI', ax=axs[1,0], label='3 month')
axs[2,0].set_title('Rolling+WA')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[2,0])
wa.plot(x='time', y='wa1', ax=axs[2,0], label='1 month')
wa.plot(x='time', y='wa2', ax=axs[2,0], label='2 month')
wa.plot(x='time', y='wa3', ax=axs[2,0], label='3 month')
axs[0,1].set_title('1 month')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[0,1])
ts1.plot(x='time',y='EVI', ax=axs[0,1], label='SS')
rol1.plot(x='time', y='EVI', ax=axs[0,1], label='Rol')
wa.plot(x='time', y='wa1', ax=axs[0,1], label='Rol+WA')
axs[1,1].set_title('2 months')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[1,1])
ts2.plot(x='time',y='EVI', ax=axs[1,1], label='SS')
rol2.plot(x='time', y='EVI', ax=axs[1,1], label='Rol')
wa.plot(x='time', y='wa2', ax=axs[1,1], label='Rol+WA')
axs[2,1].set_title('3 months')
data.loc[id].plot.scatter(x='time',y='EVI', c='k',s=1,ax=axs[2,1])
ts3.plot(x='time',y='EVI', ax=axs[2,1], label='SS')
rol3.plot(x='time', y='EVI', ax=axs[2,1], label='Rol')
wa.plot(x='time', y='wa3', ax=axs[2,1], label='Rol+WA')
# %%
#%% [markdown]
#Using [[volcano.taal]] / [[projects.taal.vegetationImpact]], this gist demonstrates how to export VI data from MODIS with associated weights in order to be read as ascii files in Timesat. The main conclusion is that ❗ **regular smoothing methods for time series in SE-ASIA Mostly struggle!!!**
# %%
from GEElt.collections import MODISVImerge
from geetools import bitreader
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import datetime as dt
from dateutil.relativedelta import *
import ee, eemont, geemap, wxee
ee.Initialize()
#%% Load Noa's data and create a featureCollection
df = pd.read_excel('/Users/seb/Documents/WORK/Projects/Taal/Noa/Survey_data.xlsx',header=1).set_index('id')
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y), crs=32651).to_crs(4326)
fc = geemap.geopandas_to_ee(gdf[['geometry']])
# %% Create a MODIS collection and export it
MODIS = MODISVImerge(bands=['EVI', 'NDVI', 'DetailedQA', 'SummaryQA'], tstart='2015-01-01', tend='2021-12-31')
ts = MODIS.getTimeSeriesByRegions(reducer = [ee.Reducer.mean(),ee.Reducer.median()],
collection = fc,
bands = ['EVI','NDVI', 'DetailedQA', 'SummaryQA'],
scale = 250)
ee.batch.Export.table.toDrive(collection=ts, description='MODIS_Taal', fileFormat='csv').start()
# %% Load exported data and format it for timesat
data = pd.read_csv('/Volumes/GoogleDrive/My Drive/MODIS_Taal.csv')
data = data.drop(['system:index', 'reducer', '.geo'], axis=1).rename({'date':'time'},axis=1)
data['time'] = pd.to_datetime(data['time'])
data['DetailedQA'] = data['DetailedQA'].astype(int)
data = data.drop_duplicates(subset=['id', 'time'])
# Setup the bit-reader to decode QA
options = {
'2-5': {0: 1,
1: 2,
2: 3,
3: 4,
4: 5,
5: 6,
6: 7,
7: 8,
8: 9,
9: 10,
10: 11,
11: 12,
12: 13,
13: 14,
14: 15,
15: 16},
}
reader = bitreader.BitReader(options, 16)
data['QA'] = data['DetailedQA'].copy()
# Loop through rows (slow!) and assign weights according to timesat. if a -9999/Nan value is found, assign 16
for i in range(0, data.shape[0]):
if data.loc[i,'QA'] == -9999:
data.loc[i, 'QA'] = 16
else:
data.loc[i,'QA'] = reader.decode(data.loc[i, 'DetailedQA'])[0]
# FillNa
data = data.set_index(['id','time']).sort_index()
data[data==-9999] = np.nan
data = data.fillna(method='ffill')
data = data.reset_index().set_index('id')
# %% Format and write files to be used with Timesat
# Timesat parameters
nyears = 7
tmp = data.copy().loc[1]
tmp['year'] = tmp['time'].dt.year
nptperyear = tmp.groupby('year').count().iloc[0,0]
EVI = data[['EVI', 'time']].pivot(columns='time').drop_duplicates()
NDVI = data[['NDVI', 'time']].pivot(columns='time').loc[EVI.index]
QA = data[['QA', 'time']].pivot(columns='time').loc[EVI.index]
nts = EVI.shape[0]
with open('/Users/seb/Documents/WORK/Projects/Taal/Timesat/EVI.txt', 'w') as f:
f.write(f'{nyears} {nptperyear} {nts}\n')
EVI.to_csv('/Users/seb/Documents/WORK/Projects/Taal/Timesat/EVI.txt', sep=' ', mode='a', header=False, index=False)
with open('/Users/seb/Documents/WORK/Projects/Taal/Timesat/NDVI.txt', 'w') as f:
f.write(f'{nyears} {nptperyear} {nts}\n')
NDVI.to_csv('/Users/seb/Documents/WORK/Projects/Taal/Timesat/NDVI.txt', sep=' ', mode='a', header=False, index=False)
with open('/Users/seb/Documents/WORK/Projects/Taal/Timesat/QA.txt', 'w') as f:
f.write(f'{nyears} {nptperyear} {nts}\n')
QA.to_csv('/Users/seb/Documents/WORK/Projects/Taal/Timesat/QA.txt', sep=' ', mode='a', header=False, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment