jkatagi/tif_tools.py

## tif_tools.py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os.path
import re

from osgeo import gdal
from osgeo import gdal_array
from osgeo import osr

def get_gain_band(input_file):
    """get GAIN_BAND from meta file (*.tif.txt)"""
     # define file name of *.tif.txt
    ifile_txt = re.sub(r'.tif', '.tif.txt', input_file)
    ld = open(ifile_txt)
    lines = ld.readlines()
    ld.close()

    gain_band = []
    for line in lines:
        if line.find("GAIN_BAND") >= 0:
             gain_band.append(float((re.split(' ', line)[1]).strip()))
    return gain_band

def tif2array(input_file, calc_gain=True):
    """
    read GeoTiff and convert to numpy.ndarray.

    Inputs:
        input_file (str) : the name of input GeoTiff file.
        calc_gain (bool) : wheter calc GAIN to DN  or not (defaul:True).
    return:
        image(np.array) : image for each bands
        dataset : for gdal's data drive.
    """
    dataset = gdal.Open(input_file, gdal.GA_ReadOnly)
    # Allocate our array using the first band's datatype
    image_datatype = dataset.GetRasterBand(1).DataType
    image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
                     dtype=float)

    if calc_gain == True:
        # get gain
        gain = get_gain_band(input_file)

    # Loop over all bands in dataset
    for b in range(dataset.RasterCount):
        # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
        band = dataset.GetRasterBand(b + 1)
        # Read in the band's data into the third dimension of our array
        if calc_gain == True:
            # calc gain value for each bands
            image[:, :, b] = band.ReadAsArray() * gain[b]
        else:
            image[:, :, b] = band.ReadAsArray()
    return image, dataset

def read_training_data(training_data, input_file, band_num):
    """read training data.
    input: training_data ... file name of training_data
          input_file ... file name of input img
    """
    df=pd.read_table(training_data ,sep=',', header=None)
    scene = os.path.basename(input_file)
    # get only training data for input image.
    # we assume last columns is scene_name. so df.iloc[,-1].
    training_data_dataframe = df[ df.iloc[:,-1].str.contains(scene)]
    # convert np.array
    training_data=training_data_dataframe.as_matrix()
    # category
    category_label = np.array(training_data[:,1], dtype=int)
    #feature: np.array(band1_gain, band2_gain, band3_gain, band4_gain..., bandn_gain)
    feature = np.array(training_data[:,5:5+band_num], dtype=float)

    return category_label, feature
def array2raster(newRasterfn, dataset, array, dtype):
    """
    save GTiff file from numpy.array
    input:
        newRasterfn: save file name
        dataset : original tif file
        array : numpy.array
        dtype: Byte or Float32.
    """
    cols = array.shape[1]
    rows = array.shape[0]
    originX, pixelWidth, b, originY, d, pixelHeight = dataset.GetGeoTransform()

    driver = gdal.GetDriverByName('GTiff')

    # set data type to save.
    GDT_dtype = gdal.GDT_Unknown
    if dtype == "Byte":
        GDT_dtype = gdal.GDT_Byte
    elif dtype == "Float32":
        GDT_dtype = gdal.GDT_Float32
    else:
        print("Not supported data type.")

    # set number of band.
    if array.ndim == 2:
        band_num = 1
    else:
        band_num = array.shape[2]

    outRaster = driver.Create(newRasterfn, cols, rows, band_num, GDT_dtype)
    outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))

    # Loop over all bands.
    for b in range(band_num):
        outband = outRaster.GetRasterBand(b + 1)
        # Read in the band's data into the third dimension of our array
        if band_num == 1:
            outband.WriteArray(array)
        else:
            outband.WriteArray(array[:,:,b])

    # setteing srs from input tif file.
    prj=dataset.GetProjection()
    outRasterSRS = osr.SpatialReference(wkt=prj)
    outRaster.SetProjection(outRasterSRS.ExportToWkt())
    outband.FlushCache()
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	import os.path
	import re

	from osgeo import gdal
	from osgeo import gdal_array
	from osgeo import osr

	def get_gain_band(input_file):
	"""get GAIN_BAND from meta file (*.tif.txt)"""
	# define file name of *.tif.txt
	ifile_txt = re.sub(r'.tif', '.tif.txt', input_file)
	ld = open(ifile_txt)
	lines = ld.readlines()
	ld.close()

	gain_band = []
	for line in lines:
	if line.find("GAIN_BAND") >= 0:
	gain_band.append(float((re.split(' ', line)[1]).strip()))
	return gain_band

	def tif2array(input_file, calc_gain=True):
	"""
	read GeoTiff and convert to numpy.ndarray.

	Inputs:
	input_file (str) : the name of input GeoTiff file.
	calc_gain (bool) : wheter calc GAIN to DN or not (defaul:True).
	return:
	image(np.array) : image for each bands
	dataset : for gdal's data drive.
	"""
	dataset = gdal.Open(input_file, gdal.GA_ReadOnly)
	# Allocate our array using the first band's datatype
	image_datatype = dataset.GetRasterBand(1).DataType
	image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
	dtype=float)

	if calc_gain == True:
	# get gain
	gain = get_gain_band(input_file)

	# Loop over all bands in dataset
	for b in range(dataset.RasterCount):
	# Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
	band = dataset.GetRasterBand(b + 1)
	# Read in the band's data into the third dimension of our array
	if calc_gain == True:
	# calc gain value for each bands
	image[:, :, b] = band.ReadAsArray() * gain[b]
	else:
	image[:, :, b] = band.ReadAsArray()
	return image, dataset

	def read_training_data(training_data, input_file, band_num):
	"""read training data.
	input: training_data ... file name of training_data
	input_file ... file name of input img
	"""
	df=pd.read_table(training_data ,sep=',', header=None)
	scene = os.path.basename(input_file)
	# get only training data for input image.
	# we assume last columns is scene_name. so df.iloc[,-1].
	training_data_dataframe = df[ df.iloc[:,-1].str.contains(scene)]
	# convert np.array
	training_data=training_data_dataframe.as_matrix()
	# category
	category_label = np.array(training_data[:,1], dtype=int)
	#feature: np.array(band1_gain, band2_gain, band3_gain, band4_gain..., bandn_gain)
	feature = np.array(training_data[:,5:5+band_num], dtype=float)

	return category_label, feature
	def array2raster(newRasterfn, dataset, array, dtype):
	"""
	save GTiff file from numpy.array
	input:
	newRasterfn: save file name
	dataset : original tif file
	array : numpy.array
	dtype: Byte or Float32.
	"""
	cols = array.shape[1]
	rows = array.shape[0]
	originX, pixelWidth, b, originY, d, pixelHeight = dataset.GetGeoTransform()

	driver = gdal.GetDriverByName('GTiff')

	# set data type to save.
	GDT_dtype = gdal.GDT_Unknown
	if dtype == "Byte":
	GDT_dtype = gdal.GDT_Byte
	elif dtype == "Float32":
	GDT_dtype = gdal.GDT_Float32
	else:
	print("Not supported data type.")

	# set number of band.
	if array.ndim == 2:
	band_num = 1
	else:
	band_num = array.shape[2]

	outRaster = driver.Create(newRasterfn, cols, rows, band_num, GDT_dtype)
	outRaster.SetGeoTransform((originX, pixelWidth, 0, originY, 0, pixelHeight))

	# Loop over all bands.
	for b in range(band_num):
	outband = outRaster.GetRasterBand(b + 1)
	# Read in the band's data into the third dimension of our array
	if band_num == 1:
	outband.WriteArray(array)
	else:
	outband.WriteArray(array[:,:,b])

	# setteing srs from input tif file.
	prj=dataset.GetProjection()
	outRasterSRS = osr.SpatialReference(wkt=prj)
	outRaster.SetProjection(outRasterSRS.ExportToWkt())
	outband.FlushCache()