Skip to content

Instantly share code, notes, and snippets.

@acocos
acocos / 1_timestamped-property-enrichment-demo.ipynb
Last active June 30, 2022 14:38
iggy-property-enrichment-demo
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@acocos
acocos / iggy_perdistrict_flow.py
Last active January 24, 2022 23:51
[iggy-metaflow-demo] per-district flow
from iggy_metaflow_base import IggyFlow, LoadedDataset
from metaflow import FlowSpec, step, catch
class IggyPerDistrictFlow(FlowSpec, IggyFlow):
@step
def start(self):
# load data (same as `iggy_baseline_flow`)
@step
@acocos
acocos / iggy_metaflow_base.py
Created January 24, 2022 17:50
[iggy-metaflow-demo] base enrich fn
from metaflow import JSONType, Parameter
import json
DEFAULT_IGGY_PKG_CONFIG = {
"iggy_version_id": "20211110214810",
"crosswalk_prefix": "fl_pinellas_quadkeys",
"iggy_prefix": "fl_pinellas_quadkeys",
}
IGGY_FEATURES = [
@acocos
acocos / iggy_enrich_flow.py
Last active January 24, 2022 23:52
[iggy-metaflow-demo] iggy enrich fn
from iggy_metaflow_base import IggyFlow
from metaflow import FlowSpec, step, Parameter
class IggyEnrichFlow(FlowSpec, IggyFlow):
@step
def start(self):
# Load Data (same as `iggy_baseline_flow`)
...
@acocos
acocos / iggy_baseline_flow.py
Last active January 24, 2022 23:53
[iggy-metaflow-demo] IggyFlow and IggyBaselineFlow classes
from iggy_metaflow_base import IggyFlow # implements basic functions for loading data, model training and evaluation
from metaflow import FlowSpec, step
class IggyBaselineFlow(FlowSpec, IggyFlow):
@step
def start(self):
# Load Data
self.file_prefix = "baseline"
data, scaled_features = self.load_data(drop_cols=True)
# convert the enriched data to a Featuretools entity set, and describe the columns
es_iggy = ft.EntitySet(id='sales-data-iggy')
es_iggy = es_iggy.entity_from_dataframe(
entity_id='sales',
dataframe=df_iggy,
index='strap'
)
iggy_datadict = pd.DataFrame(
[(v.name, v.type_string, df_iggy.iloc[8][v.name])
for v in es_iggy['sales'].variables],
# finally, combine everything in an IggyFeatureSet
fs = iggyfeature.IggyFeatureSet(
distance_features + count_features + value_features + amenity_score_features)
# and enrich your entire data frame at once
df_iggy = fs.enrich_dataframe(df, latitude_col='latitude', longitude_col='longitude')
# add features for amenity score
amenity_score_features = [
iggyfeature.IggyAmenitiesScoreFeature(iggy, within_minutes_driving=10),
iggyfeature.IggyAmenitiesScoreFeature(iggy, within_minutes_walking=10)
]
# add features for physical characteristics of location
value_features = [
iggyfeature.IggyLookupFeature(
iggy, label='light-pollution', calc_method='value'),
iggyfeature.IggyLookupFeature(
iggy, label='tree-canopy', calc_method='value'),
iggyfeature.IggyLookupFeature(
iggy, label='air_quality', calc_method='value'),
iggyfeature.IggyLookupFeature(
iggy, label='population_density_per_km', calc_method='value')
# add feature encoding count of specific POI types within nearby radius
count_features = [
iggyfeature.IggyPOIFeature(
iggy, label='restaurants', within_minutes_walking=15, calc_method='count'),
iggyfeature.IggyPOIFeature(
iggy, label='bars', within_minutes_walking=15, calc_method='count'),
iggyfeature.IggyPOIFeature(
iggy, label='coffee_shops', within_minutes_walking=15, calc_method='count'),
iggyfeature.IggyPOIFeature(
iggy, label='bakeries', within_minutes_walking=15, calc_method='count'),