Skip to content

Instantly share code, notes, and snippets.

@YanCheng-go
Last active August 28, 2019 22:26
Show Gist options
  • Save YanCheng-go/82f8572a5ee6ad80db832c40cfe0aa83 to your computer and use it in GitHub Desktop.
Save YanCheng-go/82f8572a5ee6ad80db832c40cfe0aa83 to your computer and use it in GitHub Desktop.
pre-process satellite images (PS, Sentinel-2, MODIS)
from glob import glob
import numpy as np
import datetime as datetime
import os
import gdal
import netCDF4
import re
from numba import jit
import jdcal
from tqdm import tqdm_notebook
@jit
def cloud_mask(work_dir, date, output_dir):
gdal_calc_path = r'C:\Users\cheng\Anaconda3\Lib\site-packages\gdal-2.2.2-py3.6-win-amd64.egg-info\scripts\gdal_calc.py'
for i in date:
NDVI_path = work_dir + 'NDVI\\' +i+ '_NDVI.tif'
output_path = output_dir + i + '_cloudMask.tif'
gdal_calc_str = 'python {0} --calc "3*(A>-2)" --co="COMPRESS=LZW" --format GTiff --type Float32 -A {1} --A_band 1 --overwrite'
gdal_calc_process = gdal_calc_str.format(gdal_calc_path, NDVI_path, output_path)
os.system(gdal_calc_process)
print(output_path)
# # Cloud-free images
# date = ['20170307','20170310','20170526','20170707','20170712','20170718','20170827_1','20170827_2','20170918',
# '20170922_1','20170925','20171003']
# cloud_mask(work_dir = 'F:\\Kapiti\\PlanetScope\\output\\', date = date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\cloudMask\\')
# del date
## should also remove -1
@jit
def remove_cloud(work_dir, name, date, output_dir):
gdal_calc_path = r'C:\Users\cheng\Anaconda3\Lib\site-packages\gdal-2.2.2-py3.6-win-amd64.egg-info\scripts\gdal_calc.py'
for i in date:
cloudMask_path = work_dir + name[0] +'\\' +i+ '_'+name[0]+'.tif'
NDVI_path = work_dir + name[1] +'\\' +i+ '_'+name[1]+'.tif'
if os.path.exists(output_dir) == False:
os.mkdir(output_dir)
output_path = output_dir +'\\'+ i + '_'+name[2]+'.tif'
gdal_calc_str = 'python {0} --calc "A*(B==3)" --co="COMPRESS=LZW" --format GTiff --type Float32 --NoDataValue 0.0 -A {1} --A_band 1 -B {2} --B_band 1 --outfile {3} --overwrite'
gdal_calc_process = gdal_calc_str.format(gdal_calc_path, NDVI_path, cloudMask_path, output_path)
os.system(gdal_calc_process)
print(output_path)
# date = ['20170303','20170307','20170310','20170314','20170316','20170318','20170404','20170410','20170413',
# '20170414','20170415','20170420','20170423','20170424','20170425','20170429','20170503','20170504',
# '20170507','20170509_1','20170509_2','20170514','20170517','20170520','20170526','20170528','20170606',
# '20170612','20170614_1','20170614_2','20170615','20170616','20170621','20170624_1','20170624_2',
# '20170702_1','20170702_2','20170707','20170708_1','20170708_2','20170711_1','20170711_2','20170712',
# '20170716','20170718','20170719','20170720_1','20170720_2','20170730_1','20170730_2','20170813',
# '20170815_1','20170815_2','20170816','20170818_1','20170818_2','20170819','20170820','20170825',
# '20170827_1','20170827_2','20170830','20170918','20170922_1','20170922_2','20170925','20171003']
# include cloud-free images, exclude duplicated images
# date = ['20170303','20170307','20170310','20170314','20170316','20170318','20170404','20170410','20170413','20170414',
# '20170415','20170420','20170423','20170424','20170425','20170429','20170503','20170504','20170507','20170514',
# '20170517','20170520','20170526','20170528','20170606','20170612','20170615','20170616','20170621','20170707',
# '20170712','20170716','20170718','20170719','20170813','20170816','20170819','20170820','20170825','20170830',
# '20170918','20170925','20171003']
# remove_cloud(work_dir = 'F:\\Kapiti\\PlanetScope\\output\\', date = date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask\\')
# del date
# dup_date = ['20170509','20170614','20170624','20170702','20170708','20170711','20170720','20170730','20170815',
# '20170818','20170827','20170922']
# date = [i + '_1' for i in dup_date] + [i + '_2' for i in dup_date]
# remove_cloud(work_dir = 'F:\\Kapiti\\PlanetScope\\output\\', date = date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask_dup\\')
# del date, dup_date
# modified cloudMask
# date = ['20170410','20170420','20170423','20170425']
# remove_cloud(work_dir = 'F:\\Kapiti\\PlanetScope\\output\\', date = date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask_modified\\')
# del date
# date = ['20170303','20170307','20170310','20170314','20170316','20170318','20170404','20170410','20170413',
# '20170414','20170415','20170420','20170423','20170424','20170425','20170429','20170503','20170504',
# '20170507','20170509_1','20170509_2','20170514','20170517','20170520','20170526','20170528','20170606',
# '20170612','20170614_1','20170614_2','20170615','20170616','20170621','20170624_1','20170624_2',
# '20170702_1','20170702_2','20170707','20170708_1','20170708_2','20170711_1','20170711_2','20170712',
# '20170716','20170718','20170719','20170720_1','20170720_2','20170730_1','20170730_2','20170813',
# '20170815_1','20170815_2','20170816','20170818_1','20170818_2','20170819','20170820','20170825',
# '20170827_1','20170827_2','20170830','20170918','20170922_1','20170922_2','20170925','20171003']
# remove_cloud(work_dir = 'F:\\Kapiti\\PlanetScope\\output\\', name = ['cloudMask_Oct','NDVI','NDVI_cloudMask_Oct'], date = date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask_Oct\\')
# before use get maximum, use arcgis to set nodata to 0 remember to select compression method to LZW
# @jit
def get_maximum(input_path, date, output_dir,name):
gdal_calc_path = r'C:\Users\cheng\Anaconda3\Lib\site-packages\gdal-2.2.2-py3.6-win-amd64.egg-info\scripts\gdal_calc.py'
def main(i):
path_A = input_path +i+ '_1_'+name+'.tif'
path_B = input_path +i+ '_2_'+name+'.tif'
output_path = output_dir + i +'_'+name+'.tif'
gdal_calc_str = 'python {0} --calc "maximum(A,B)" --format GTiff --co="COMPRESS=LZW" --type Float32 --NoDataValue 0.0 -A {1} --A_band 1 -B {2} --B_band 1 --outfile {3} --overwrite'
gdal_calc_process = gdal_calc_str.format(gdal_calc_path, path_A, path_B, output_path)
os.system(gdal_calc_process)
# print(output_path)
f = list(map(main,tqdm_notebook(date, total=len(date), desc = 'get_maximum')))
# dup_date = ['20171007','20171124','20180530','20180611','20180803','20180812','20180908','20180912','20180930']
# get_maximum(input_path = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask_all_0\\', date = dup_date,
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\NDVI_cloudMask_all\\', name = 'NDVI_cloudMask')
# # new
# dup_date = ['20170307', '20170509', '20170614', '20170624', '20170702', '20170708', '20170711', '20170712', '20170720', '20170730', '20170813', '20170815', '20170818', '20170825', '20170827', '20170918', '20170922', '20171007', '20171124', '20171215', '20180226', '20180308', '20180530', '20180611', '20180716', '20180717', '20180802', '20180803', '20180812', '20180908', '20180912', '20180930']
# dup_date = ['20170922']
# get_maximum(input_path = 'D:\\Kapiti\\', date = dup_date,
# output_dir = 'D:\\Kapiti\\test\\', name = 'NDVI_cloudMask_large')
# # set 0 to nodata
# input_dir = 'F:\\Kapiti\\PlanetScope\\output\\clip_extend_2018_Sep\\'
# output_dir = 'F:\\Kapiti\\PlanetScope\\output\\null\\'
# file = glob("{}*.tif".format(input_dir))
# for i in file:
# input_file_path = i
# output_file_path = output_dir + i.split('\\')[-1]
# gdal_translate_path = 'C:/Users/cheng/Anaconda3/pkgs/libgdal-2.2.2-h309aa3f_1/Library/bin/gdal_translate.exe'
# gdal_translate_str = '{0} -a_nodata 0.0 -ot UInt16 -of GTiff {1} {2}'
# gdal_translate_process = gdal_translate_str.format(gdal_translate_path, input_file_path, output_file_path)
# print('Start!')
# os.system(gdal_translate_process)
# print('Next...' + input_file_path)
# print('Done!')
# @jit
def tif_to_netCDF(input_dir,output_dir,output_name,date,input_suffix):
print('Start!')
file_ = glob("{}\\*.tif".format(r'D:\Kapiti\MODIS\NDVI'))
# type_ = file_[0].split('\\')[-1].split('_')[0]
# open tif file
ds = gdal.Open(file_[0])
# convert image to array
a = ds.ReadAsArray()
# get the number of rows and columns
ny,nx = np.shape(a)
# get the information of coordinate transformation
b = ds.GetGeoTransform() #bbox, interval
# calculate coordinates
# x = np.arange(nx)*b[1]+b[0]
# y = np.arange(ny)*b[5]+b[3]
x = range(nx)
y = range(ny)
# the date of the first image
basedate = datetime.datetime(2017,3,3,0,0,0)
# create NetCDF file
nco = netCDF4.Dataset(output_dir+'\\'+output_name,'w',clobber=True)
# chunking is optional, but can improve access a lot:
# (see: http://www.unidata.ucar.edu/blogs/developer/entry/chunking_data_choosing_shapes)
chunk_x=16
chunk_y=16
chunk_time=12
# create dimensions, variables and attributes:
nco.createDimension('x',nx)
nco.createDimension('y',ny)
nco.createDimension('time',None)
timeo = nco.createVariable('time','f4',('time'))
timeo.units = 'days since 2017-03-03 00:00:00'
timeo.standard_name = 'time'
xo = nco.createVariable('x','u4',('x'))
xo.units = 'm'
# xo.standard_name = 'projection_x_coordinate'
xo.standard_name = 'number_of_column'
yo = nco.createVariable('y','u4',('y'))
yo.units = 'm'
# yo.standard_name = 'projection_y_coordinate'
yo.standard_name = 'number_of_row'
# create short integer variable for temperature data, with chunking
tmno = nco.createVariable('NDVI', 'f4', ('time', 'y', 'x'),
zlib=True,chunksizes=[chunk_time,chunk_y,chunk_x],fill_value=-9999,least_significant_digit=5)
tmno.standard_name = 'NDVI'
tmno_DOY = nco.createVariable('DOY', 'u4', ('time', 'y', 'x'),
zlib=True,chunksizes=[chunk_time,chunk_y,chunk_x],fill_value=-9999)
tmno_DOY.standard_name = 'DOY'
tmno_QA = nco.createVariable('QA', 'u4', ('time', 'y', 'x'),
zlib=True,chunksizes=[chunk_time,chunk_y,chunk_x],fill_value=-9999)
tmno_QA.standard_name = 'Quality flag'
# tmno = nco.createVariable('cloudMask', 'u2', ('time', 'y', 'x'),
# zlib=True,chunksizes=[chunk_time,chunk_y,chunk_x],fill_value=0)
# tmno.standard_name = 'cloud_mask'
nco.Conventions='CF-1.6'
#write x,y
xo[:]=x
yo[:]=y
# itime=0
#step through data, writing time and data to NetCDF
# for i in date:
def func(idx):
# read the time values by parsing the filename
i = date[idx]
year=int(i[0:4])
mon=int(i[4:6])
day=int(i[6:8])
# print('...'+i+'...')
itime=date.index(i)
date_=datetime.datetime(year,mon,day,0,0,0)
dtime=(date_- basedate).total_seconds()/86400.
timeo[itime]=dtime
tmn=gdal.Open(input_dir+'\\'+'NDVI\\'+str(i[0:4])+'_'+str(i[4:6])+'_'+str(i[6:8])+'.tif')
print(input_dir+'\\'+'NDVI\\'+str(i[0:4])+'_'+str(i[4:6])+'_'+str(i[6:8])+'.tif')
a=tmn.ReadAsArray() #data
tmno[itime,:,:]=a
tmn_DOY=gdal.Open(input_dir+'\\'+'DOY\\'+str(i[0:4])+'_'+str(i[4:6])+'_'+str(i[6:8])+'.tif')
b=tmn_DOY.ReadAsArray() #data
# for k1, b1 in enumerate(b):
# for k2, b2 in enumerate(b2):
# if idx<=37:
# b[k1][k2] = datetime.datetime(2017, 1, 1) + datetime.timedelta(b_item - 1)
# else:
# b[k1][k2] = datetime.datetime(2018, 1, 1) + datetime.timedelta(b_item - 1)
tmno_DOY[itime,:,:]=b
tmn_QA=gdal.Open(input_dir+'\\'+'QA\\'+str(i[0:4])+'_'+str(i[4:6])+'_'+str(i[6:8])+'.tif')
c=tmn_QA.ReadAsArray() #data
tmno_QA[itime,:,:]=c
list(map(func,tqdm_notebook(len(date), total =len(date), unit = 'file', desc = 'tif_to_netCDF')))
# itime=itime+1
print('Done!')
print('Check your output: '+output_dir)
nco.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment