Created
July 11, 2022 18:27
-
-
Save knu2xs/6c4b1ecd477ae50c6391826c45481d81 to your computer and use it in GitHub Desktop.
Integration of Python API in Sci-Kit Transformers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import lru_cache | |
from typing import Union, List, Optional | |
from arcgis.geoenrichment import Country | |
from arcgis.geometry import Polygon | |
import numpy as np | |
import pandas as pd | |
from sklearn.base import BaseEstimator, TransformerMixin | |
__all__ = ['EnrichBase', 'EnrichPolygon', 'EnrichStandardGeography', 'KeepOnlyEnrichColumns', 'ArrayToDataFrame'] | |
class EnrichBase(BaseEstimator, TransformerMixin): | |
""" | |
The ``arcpy.geoenrichment.Country.enrich`` method provides access to a massive | |
amount of data for analysis, a treasure trove of valuable data you can use | |
through enrichment. This object streamlines the process of accessing this | |
method as part of a SciKit-Learn Pipeline by wrapping the functionality | |
into a Transformer, specifically a preprocessor, and is used to create | |
other transformers performing more specific tasks. | |
""" | |
_country = None | |
_enrich_variables = None | |
_return_geometry = None | |
def __int__(self, country: Country, enrich_variables: Union[List[str], pd.DataFrame], | |
return_geometry: bool = True) -> None: | |
""" | |
Args: | |
country: Country to be used for enrichment. | |
enrich_variables: A list of enrich variable names or filtered dataframe of enrich variables to be used. | |
return_geometry: Do you want the shapes or not? | |
""" | |
# apply the parent init methods to get standard methods such as get and set params for free | |
super().__init__() | |
self.country = country | |
self.enrich_variables = enrich_variables | |
self.return_geometry = return_geometry | |
@property | |
def country(self): | |
"""``arcgis.geoenrichment.Country`` object instance being used.""" | |
return self._country | |
@country.setter | |
def country(self, country: Country): | |
assert isinstance(country, Country) | |
self._country = country | |
@property | |
def enrich_variables(self): | |
"""Pandas data frame of variables being used for enrichment.""" | |
return self._enrich_variables | |
@enrich_variables.setter | |
def enrich_variables(self, enrich_variables: Union[List[str], pd.DataFrame]): | |
# use a trick, reaching into the Country to get a matching method, to get a dataframe if only a list | |
if isinstance(enrich_variables, list): | |
self._enrich_variables = self.country._ba_cntry.get_enrich_variables_from_iterable(enrich_variables) | |
elif isinstance(enrich_variables, pd.DataFrame): | |
self._enrich_variables = enrich_variables | |
else: | |
raise ValueError('enrich_variables must be either a list of enrich variable names or a enrich variable ' | |
'data frame') | |
@property | |
def return_geometry(self): | |
"""Do you want the geometry when enriching?""" | |
return self._return_geometry | |
@return_geometry.setter | |
def return_geometry(self, return_geometry: bool): | |
assert isinstance(return_geometry, bool) | |
self._return_geometry = return_geometry | |
@property | |
@lru_cache(maxsize=8) | |
def enrich_var_aliases(self): | |
"""List of enrich aliases, so you can understand what the variables are.""" | |
return list(self.enrich_variables['alias']) | |
def fit(self, X): | |
"""Since just building a preprocessor nothing is happening here.""" | |
return self | |
class EnrichPolygon(EnrichBase): | |
""" | |
The ``arcpy.geoenrichment.Country.enrich`` wrapped in a preprocessor | |
for enriching input areas delineated with ``arcgis.geometry.Polygon`` | |
geometries. Inherits from ``EnrichBase``. | |
""" | |
def __init__(self, country: Country, enrich_variables: Union[List[str], pd.DataFrame], | |
return_geometry: bool = True) -> None: | |
""" | |
Args: | |
country: Country to be used for enrichment. | |
enrich_variables: A list of enrich variable names or filtered dataframe of enrich variables to be used. | |
return_geometry: Do you want the shapes or not? | |
""" | |
super().__init__(country, enrich_variables, return_geometry) | |
def transform(self, X: Union[pd.DataFrame, List[Polygon], np.ndarray]) -> pd.DataFrame: | |
""" | |
Retrieve Pandas Data Frame of enriched data. | |
Args: | |
X: List of Polygon geometries or Spatially Enabled DataFrame of areas | |
to be enriched. | |
Returns: | |
Enriched data. | |
""" | |
# since just for polygons, make sure this is what we are working with | |
has_valid_polygons = False | |
if isinstance(X, pd.DataFrame): | |
if X.spatial.name is not None: | |
if X.spatial.validate(): | |
geom_typ_lst = X.spatial.geometry_type | |
if len(geom_typ_lst): | |
geom_typ = geom_typ_lst[0] | |
if geom_typ.lower() == 'polygon': | |
has_valid_polygons = True | |
elif isinstance(X, (list, np.ndarray)): | |
first_geom = X[0] | |
if first_geom.geometry_type.lower() == 'polygon': | |
has_valid_polygons = True | |
if not has_valid_polygons: | |
raise ValueError('It does not appear the inputs are valid Polygons. Please ensure the inputs are valid ' | |
'Polygons.') | |
# invoke enrich and put in variable to make debugging easier | |
ret_df = self.country.enrich(X, enrich_variables=self.enrich_variables, return_geometry=self.return_geometry) | |
return ret_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment