grovduck/esri_knn.py

## esri_knn.py
class FeatureSpace(object):
    def __init__(self, max_k):
        self.max_k = max_k
        self.knn_classifier = KNeighborsClassifier(max_k)

    def train(self):
        raise NotImplementedError

    def transform(self, raw_scores):
        raise NotImplementedError

    def k_neighbors(self, scores):
        return self.knn_classifier.kneighbors(scores)

class CCAFeatureSpace(FeatureSpace):
    def __init__(self, max_k, n_axes, spp_transform=None):
        super(CCAFeatureSpace, self).__init__(n_axes, max_k)
        self.n_axes = n_axes
        self.spp_transform = spp_transform

    def train(self, species_df, environmental_df, id_field='ID'):
        # Here's where you run NumpyCCA
        # Set a bunch of self variables that define the space
        self.fcids = np.array(species_df.__dict__[id_field])
        spp_arr = np.array(species_df.drop(columns=[id_field]))
        env_arr = np.array(enironmental_df.drop(columns=[id_field]))
        cca_obj = NumpyCCA(spp_arr, env_arr)
        self.coefficients = cca_obj.coefficients()
        self.lc_scores = cca_obj.site_lc_scores()
        # ...

        # At the end, set the plot_scores on the knn_classifier
        self.knn_classifier.fit(plot_scores, fcids)

    def transform(self, raw_scores):
        # This is function transform_env_score in current code
        return transform_env_score(raw_scores)

    def k_neighbors(self, scores):
        dist, ind = super(CCAFeatureSpace, self).k_neighbors(scores)
        neighbor_ids = self.fcids[ind[0]]
        return dist[0], neighbor_ids


class EuclideanFeatureSpace(FeatureSpace):
    def __init__(self, max_k):
        super(EuclideanFeatureSpace, self).__init__(max_k)

    def train(self, environmental_df, id_field='ID')
        self.fcids = np.array(environmental_df.__dict__[id_field])
        # Here's where you'd set up the axes based on normalized
        # environmental variables, like in our first example
        # ...

        # At the end, set the plot_scores on the knn_classifier
        self.knn_classifier.fit(plot_scores, fcids)

    def transform(self, raw_scores):
        # Standardize environmental variables
        # Not currently implemented
        return subtract_mean_divide_std(raw_scores)

    def k_neighbors(self, scores):
        # TODO: Redundant with CCA because self.fcids is member
        # of subclasses
        dist, ind = super(EuclideanFeatureSpace, self).k_neighbors(scores)
        neighbor_ids = self.fcids[ind[0]]
        return dist[0], neighbor_ids


# A class to represent a stack (bands) of rasters that is capable
# of imputation and attribution
class RasterList(object):
    def __init__(self, rasters):
        # Build the raster list
        self.raster_list = # ...

    def extract(self, point_feature_class):
        # Extract spatial signatures at plot locations
        return pd.DataFrame(env_signatures)

    def impute(self, feature_space_obj):
        # This is essentially your updatePixels loop from
        # lines 347-351
        pix_array_dim = pix_array.shape
        num_squares_x = pix_array_dim[1]
        num_squares_y = pix_array_dim[2]
        nn_neighbors = np.ones((band_count, num_squares_x, num_squares_y))
        for num_x in range(0, int(num_squares_x)):
            for num_y in range(0, int(num_squares_y)):
                raw_score = pix_array[:, num_x, num_y])
                transformed_score = feature_space_obj.transform(raw_score)
                self.distances, self.neighbors = (
                    feature_space_obj.k_neighbors(transformed_score)
                )

    def map_attribute(self, attr_df, map_attr, k=None, distance_weighting=None):
        # Yet to do, but basically the logic in lines 351:353


def main():
    # Set up your input layers
    # This is more like pseudocode, but I'm guesing you have methods
    # to do this stuff
    raster_stack = RasterList([
        Raster('C:/path/to/raster/file/1'),
        Raster('C:/path/to/raster/file/2'),
        Raster.extract_band('C:/path/to/harmonic/regression', 1)
        Raster.extract_band('C:/path/to/harmonic/regression', 2)
        # ...
    ])

    # Sample point feature class
    fc = FeatureClass('C:/path/to/feature/class')

    # Get the environmental matrix
    env_df = raster_stack.extract(fc)

    # We'll pretend we're running a CCA, so read in a species file
    # as well
    spp_df = pd.read_csv('C:/path/to/species/file')

    # Create the CCA object and train it using the two matrices
    fs_obj = CCAFeatureSpace(8, 10, spp_transform='SQRT')
    fs_obj.train(env_df, spp_df, id_field='FCID')

    # Get all neighbors and distances using the requested feature space
    # Note that we're guaranteed that the same ordering of environmental
    # data that went into the feature space creation is the same when
    # running impute
    raster_stack.impute(fs_obj)

    # We'll pretend we have a separate CSV file of attributes, although
    # they could be connected to the point feature class as well
    attr_df = pd.read_csv('C:/path/to/attributes/file')

    # Map a given variable and return a raster
    psme_raster = raster_stack.map_attribute(
        attr_df, 'PSME', k=5, distance_weighting='INVERSE'
    )
	class FeatureSpace(object):
	def __init__(self, max_k):
	self.max_k = max_k
	self.knn_classifier = KNeighborsClassifier(max_k)

	def train(self):
	raise NotImplementedError

	def transform(self, raw_scores):
	raise NotImplementedError

	def k_neighbors(self, scores):
	return self.knn_classifier.kneighbors(scores)

	class CCAFeatureSpace(FeatureSpace):
	def __init__(self, max_k, n_axes, spp_transform=None):
	super(CCAFeatureSpace, self).__init__(n_axes, max_k)
	self.n_axes = n_axes
	self.spp_transform = spp_transform

	def train(self, species_df, environmental_df, id_field='ID'):
	# Here's where you run NumpyCCA
	# Set a bunch of self variables that define the space
	self.fcids = np.array(species_df.__dict__[id_field])
	spp_arr = np.array(species_df.drop(columns=[id_field]))
	env_arr = np.array(enironmental_df.drop(columns=[id_field]))
	cca_obj = NumpyCCA(spp_arr, env_arr)
	self.coefficients = cca_obj.coefficients()
	self.lc_scores = cca_obj.site_lc_scores()
	# ...

	# At the end, set the plot_scores on the knn_classifier
	self.knn_classifier.fit(plot_scores, fcids)

	def transform(self, raw_scores):
	# This is function transform_env_score in current code
	return transform_env_score(raw_scores)

	def k_neighbors(self, scores):
	dist, ind = super(CCAFeatureSpace, self).k_neighbors(scores)
	neighbor_ids = self.fcids[ind[0]]
	return dist[0], neighbor_ids


	class EuclideanFeatureSpace(FeatureSpace):
	def __init__(self, max_k):
	super(EuclideanFeatureSpace, self).__init__(max_k)

	def train(self, environmental_df, id_field='ID')
	self.fcids = np.array(environmental_df.__dict__[id_field])
	# Here's where you'd set up the axes based on normalized
	# environmental variables, like in our first example
	# ...

	# At the end, set the plot_scores on the knn_classifier
	self.knn_classifier.fit(plot_scores, fcids)

	def transform(self, raw_scores):
	# Standardize environmental variables
	# Not currently implemented
	return subtract_mean_divide_std(raw_scores)

	def k_neighbors(self, scores):
	# TODO: Redundant with CCA because self.fcids is member
	# of subclasses
	dist, ind = super(EuclideanFeatureSpace, self).k_neighbors(scores)
	neighbor_ids = self.fcids[ind[0]]
	return dist[0], neighbor_ids


	# A class to represent a stack (bands) of rasters that is capable
	# of imputation and attribution
	class RasterList(object):
	def __init__(self, rasters):
	# Build the raster list
	self.raster_list = # ...

	def extract(self, point_feature_class):
	# Extract spatial signatures at plot locations
	return pd.DataFrame(env_signatures)

	def impute(self, feature_space_obj):
	# This is essentially your updatePixels loop from
	# lines 347-351
	pix_array_dim = pix_array.shape
	num_squares_x = pix_array_dim[1]
	num_squares_y = pix_array_dim[2]
	nn_neighbors = np.ones((band_count, num_squares_x, num_squares_y))
	for num_x in range(0, int(num_squares_x)):
	for num_y in range(0, int(num_squares_y)):
	raw_score = pix_array[:, num_x, num_y])
	transformed_score = feature_space_obj.transform(raw_score)
	self.distances, self.neighbors = (
	feature_space_obj.k_neighbors(transformed_score)
	)

	def map_attribute(self, attr_df, map_attr, k=None, distance_weighting=None):
	# Yet to do, but basically the logic in lines 351:353


	def main():
	# Set up your input layers
	# This is more like pseudocode, but I'm guesing you have methods
	# to do this stuff
	raster_stack = RasterList([
	Raster('C:/path/to/raster/file/1'),
	Raster('C:/path/to/raster/file/2'),
	Raster.extract_band('C:/path/to/harmonic/regression', 1)
	Raster.extract_band('C:/path/to/harmonic/regression', 2)
	# ...
	])

	# Sample point feature class
	fc = FeatureClass('C:/path/to/feature/class')

	# Get the environmental matrix
	env_df = raster_stack.extract(fc)

	# We'll pretend we're running a CCA, so read in a species file
	# as well
	spp_df = pd.read_csv('C:/path/to/species/file')

	# Create the CCA object and train it using the two matrices
	fs_obj = CCAFeatureSpace(8, 10, spp_transform='SQRT')
	fs_obj.train(env_df, spp_df, id_field='FCID')

	# Get all neighbors and distances using the requested feature space
	# Note that we're guaranteed that the same ordering of environmental
	# data that went into the feature space creation is the same when
	# running impute
	raster_stack.impute(fs_obj)

	# We'll pretend we have a separate CSV file of attributes, although
	# they could be connected to the point feature class as well
	attr_df = pd.read_csv('C:/path/to/attributes/file')

	# Map a given variable and return a raster
	psme_raster = raster_stack.map_attribute(
	attr_df, 'PSME', k=5, distance_weighting='INVERSE'
	)