Skip to content

Instantly share code, notes, and snippets.

@jkatagi
Last active April 25, 2023 03:28
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save jkatagi/a1207eee32463efd06fb57676dcf86c8 to your computer and use it in GitHub Desktop.
Save jkatagi/a1207eee32463efd06fb57676dcf86c8 to your computer and use it in GitHub Desktop.
Read GeoTiff and convert numpy.array to GeoTiff.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
import re
from osgeo import gdal
from osgeo import gdal_array
from osgeo import osr
def get_gain_band(input_file):
"""get GAIN_BAND from meta file (*.tif.txt)"""
# define file name of *.tif.txt
ifile_txt = re.sub(r'.tif', '.tif.txt', input_file)
ld = open(ifile_txt)
lines = ld.readlines()
ld.close()
gain_band = []
for line in lines:
if line.find("GAIN_BAND") >= 0:
gain_band.append(float((re.split(' ', line)[1]).strip()))
return gain_band
def tif2array(input_file, calc_gain=True):
"""
read GeoTiff and convert to numpy.ndarray.
Inputs:
input_file (str) : the name of input GeoTiff file.
calc_gain (bool) : wheter calc GAIN to DN or not (defaul:True).
return:
image(np.array) : image for each bands
dataset : for gdal's data drive.
"""
dataset = gdal.Open(input_file, gdal.GA_ReadOnly)
# Allocate our array using the first band's datatype
image_datatype = dataset.GetRasterBand(1).DataType
image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
dtype=float)
if calc_gain == True:
# get gain
gain = get_gain_band(input_file)
# Loop over all bands in dataset
for b in range(dataset.RasterCount):
# Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
band = dataset.GetRasterBand(b + 1)
# Read in the band's data into the third dimension of our array
if calc_gain == True:
# calc gain value for each bands
image[:, :, b] = band.ReadAsArray() * gain[b]
else:
image[:, :, b] = band.ReadAsArray()
return image, dataset
def read_training_data(training_data, input_file, band_num):
"""read training data.
input: training_data ... file name of training_data
input_file ... file name of input img
"""
df=pd.read_table(training_data ,sep=',', header=None)
scene = os.path.basename(input_file)
# get only training data for input image.
# we assume last columns is scene_name. so df.iloc[,-1].
training_data_dataframe = df[ df.iloc[:,-1].str.contains(scene)]
# convert np.array
training_data=training_data_dataframe.as_matrix()
# category
category_label = np.array(training_data[:,1], dtype=int)
#feature: np.array(band1_gain, band2_gain, band3_gain, band4_gain..., bandn_gain)
feature = np.array(training_data[:,5:5+band_num], dtype=float)
return category_label, feature
def array2raster(newRasterfn, dataset, array, dtype):
"""
save GTiff file from numpy.array
input:
newRasterfn: save file name
dataset : original tif file
array : numpy.array
dtype: Byte or Float32.
"""
cols = array.shape[1]
rows = array.shape[0]
originX, pixelWidth, b, originY, d, pixelHeight = dataset.GetGeoTransform()
driver = gdal.GetDriverByName('GTiff')
# set data type to save.
GDT_dtype = gdal.GDT_Unknown
if dtype == "Byte":
GDT_dtype = gdal.GDT_Byte
elif dtype == "Float32":
GDT_dtype = gdal.GDT_Float32
else:
print("Not supported data type.")
# set number of band.
if array.ndim == 2:
band_num = 1
else:
band_num = array.shape[2]
outRaster = driver.Create(newRasterfn, cols, rows, band_num, GDT_dtype)
outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))
# Loop over all bands.
for b in range(band_num):
outband = outRaster.GetRasterBand(b + 1)
# Read in the band's data into the third dimension of our array
if band_num == 1:
outband.WriteArray(array)
else:
outband.WriteArray(array[:,:,b])
# setteing srs from input tif file.
prj=dataset.GetProjection()
outRasterSRS = osr.SpatialReference(wkt=prj)
outRaster.SetProjection(outRasterSRS.ExportToWkt())
outband.FlushCache()
@jkatagi
Copy link
Author

jkatagi commented May 25, 2017

I should add masking process (e.g. cloud masking, shadow masking).
This is because after multiply GAIN value, we cannot distinguish whether it is cloud or not any more.

Copy link

ghost commented Jun 1, 2019

Hey Buddy. this is AWESOME. You just saved me.
whole this creating a tif from an array is really a pain.
Thanks <3

@laich91
Copy link

laich91 commented Apr 10, 2020

Great jobs, very helpful, thank you!!!

@sameerCoder
Copy link

Hi,
can u help me in converting numpy to geotiff,
i am nwtogeotiff soi dont know how to proceed ,
I Will really appreciate ur help
detail u can find in below link,

https://stackoverflow.com/questions/64628207/convert-png-to-geotiff-using-python-programming

Thank you

@EqiLuo
Copy link

EqiLuo commented Dec 11, 2020

Thank you soooo much dude! This code is brilliant. Helps a lot

@rdvelazquez
Copy link

Super helpful. Thanks for sharing!

@kapasi1234
Copy link

thanx for sharing but if it had documentation with it then it would have been more helpfull.

@Gedeon-m-gedus
Copy link

Super useful, Thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment