Skip to content

Instantly share code, notes, and snippets.

@knu2xs
Created July 11, 2022 18:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save knu2xs/6c4b1ecd477ae50c6391826c45481d81 to your computer and use it in GitHub Desktop.
Save knu2xs/6c4b1ecd477ae50c6391826c45481d81 to your computer and use it in GitHub Desktop.
Integration of Python API in Sci-Kit Transformers
from functools import lru_cache
from typing import Union, List, Optional
from arcgis.geoenrichment import Country
from arcgis.geometry import Polygon
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
__all__ = ['EnrichBase', 'EnrichPolygon', 'EnrichStandardGeography', 'KeepOnlyEnrichColumns', 'ArrayToDataFrame']
class EnrichBase(BaseEstimator, TransformerMixin):
"""
The ``arcpy.geoenrichment.Country.enrich`` method provides access to a massive
amount of data for analysis, a treasure trove of valuable data you can use
through enrichment. This object streamlines the process of accessing this
method as part of a SciKit-Learn Pipeline by wrapping the functionality
into a Transformer, specifically a preprocessor, and is used to create
other transformers performing more specific tasks.
"""
_country = None
_enrich_variables = None
_return_geometry = None
def __int__(self, country: Country, enrich_variables: Union[List[str], pd.DataFrame],
return_geometry: bool = True) -> None:
"""
Args:
country: Country to be used for enrichment.
enrich_variables: A list of enrich variable names or filtered dataframe of enrich variables to be used.
return_geometry: Do you want the shapes or not?
"""
# apply the parent init methods to get standard methods such as get and set params for free
super().__init__()
self.country = country
self.enrich_variables = enrich_variables
self.return_geometry = return_geometry
@property
def country(self):
"""``arcgis.geoenrichment.Country`` object instance being used."""
return self._country
@country.setter
def country(self, country: Country):
assert isinstance(country, Country)
self._country = country
@property
def enrich_variables(self):
"""Pandas data frame of variables being used for enrichment."""
return self._enrich_variables
@enrich_variables.setter
def enrich_variables(self, enrich_variables: Union[List[str], pd.DataFrame]):
# use a trick, reaching into the Country to get a matching method, to get a dataframe if only a list
if isinstance(enrich_variables, list):
self._enrich_variables = self.country._ba_cntry.get_enrich_variables_from_iterable(enrich_variables)
elif isinstance(enrich_variables, pd.DataFrame):
self._enrich_variables = enrich_variables
else:
raise ValueError('enrich_variables must be either a list of enrich variable names or a enrich variable '
'data frame')
@property
def return_geometry(self):
"""Do you want the geometry when enriching?"""
return self._return_geometry
@return_geometry.setter
def return_geometry(self, return_geometry: bool):
assert isinstance(return_geometry, bool)
self._return_geometry = return_geometry
@property
@lru_cache(maxsize=8)
def enrich_var_aliases(self):
"""List of enrich aliases, so you can understand what the variables are."""
return list(self.enrich_variables['alias'])
def fit(self, X):
"""Since just building a preprocessor nothing is happening here."""
return self
class EnrichPolygon(EnrichBase):
"""
The ``arcpy.geoenrichment.Country.enrich`` wrapped in a preprocessor
for enriching input areas delineated with ``arcgis.geometry.Polygon``
geometries. Inherits from ``EnrichBase``.
"""
def __init__(self, country: Country, enrich_variables: Union[List[str], pd.DataFrame],
return_geometry: bool = True) -> None:
"""
Args:
country: Country to be used for enrichment.
enrich_variables: A list of enrich variable names or filtered dataframe of enrich variables to be used.
return_geometry: Do you want the shapes or not?
"""
super().__init__(country, enrich_variables, return_geometry)
def transform(self, X: Union[pd.DataFrame, List[Polygon], np.ndarray]) -> pd.DataFrame:
"""
Retrieve Pandas Data Frame of enriched data.
Args:
X: List of Polygon geometries or Spatially Enabled DataFrame of areas
to be enriched.
Returns:
Enriched data.
"""
# since just for polygons, make sure this is what we are working with
has_valid_polygons = False
if isinstance(X, pd.DataFrame):
if X.spatial.name is not None:
if X.spatial.validate():
geom_typ_lst = X.spatial.geometry_type
if len(geom_typ_lst):
geom_typ = geom_typ_lst[0]
if geom_typ.lower() == 'polygon':
has_valid_polygons = True
elif isinstance(X, (list, np.ndarray)):
first_geom = X[0]
if first_geom.geometry_type.lower() == 'polygon':
has_valid_polygons = True
if not has_valid_polygons:
raise ValueError('It does not appear the inputs are valid Polygons. Please ensure the inputs are valid '
'Polygons.')
# invoke enrich and put in variable to make debugging easier
ret_df = self.country.enrich(X, enrich_variables=self.enrich_variables, return_geometry=self.return_geometry)
return ret_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment