Skip to content

Instantly share code, notes, and snippets.

@benman1
Last active June 10, 2018 19:21
Show Gist options
  • Save benman1/2078dc90bccd1b25dfed3f0a6868e682 to your computer and use it in GitHub Desktop.
Save benman1/2078dc90bccd1b25dfed3f0a6868e682 to your computer and use it in GitHub Desktop.
binning as preprocessing
import sys
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.datasets import load_iris
def extract_bins(tree):
'''Extract bins from tree
'''
edges = {}
for feature, threshold in zip(tree.feature, tree.threshold):
if feature == -2: # undefined
continue
if feature not in edges:
edges[feature] = []
edges[feature].append(threshold)
bins = {}
for f, e in edges.items():
print('feature: {}, edges: {}'.format(f, e))
dummy = e
dummy.sort()
dummy.insert(0, sys.float_info.min)
dummy.append(sys.float_info.max)
bins[f] = dummy
return bins
# load some dataset
data = load_iris()
# fit a tree model - rf
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(data.data, data.target)
# TODO: replace with loop here
dt = clf.estimators_[0]
tree = dt.__dict__['tree_']
bins = extract_bins(tree)
# TODO: use bins for model; implement as preprocessing step
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment