This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import codecs | |
from itertools import izip, count | |
from time import time | |
def iter_chunks(csvfile, chunk_size): | |
chunk = [] | |
for row in csvfile: | |
chunk.append(row) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lightning.impl.primal_cd import CDClassifier | |
from lightning.impl.datasets.samples_generator import make_classification | |
bin_dense, bin_target = make_classification(n_samples=20000, n_features=100, n_informative=5, | |
n_classes=2, random_state=0, flip_y=0.2) | |
est = CDClassifier(C=1.0, alpha=0.01, random_state=0, penalty="l2", loss="log", verbose=3, max_iter=100) | |
est.fit(bin_dense[:10000,:], bin_target[:10000]) | |
est.score(bin_dense[10000:,:], bin_target[10000:]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.ensemble import gradient_boosting | |
import time | |
from joblib import Parallel, delayed | |
class Bad(object): | |
tree_ = None | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Benchmark sklearn's SGDClassifier on RCV1-ccat dataset. | |
So generate the input files see http://leon.bottou.org/projects/sgd . | |
Results | |
------- | |
ACC: 0.9479 | |
AUC: 0.9476 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import numpy as np | |
from sklearn.linear_model import SGDClassifier | |
from sklearn import metrics | |
def transform_pairwise(X, y): | |
"""Transforms data into pairs with balanced labels for ranking |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Benchmark script to bench scikit-learn's RandomForestClassifier | |
vs. R's randomForest. | |
It uses rpy2 to call R from python. Timings for randomForest are | |
pessimistic due to a constant overhead by wrapping numpy matrices | |
in R data_frames. The effect of the overhead can be reduced | |
by increasing the number of trees. | |
Note: make sure the LD_LIBRARY_PATH is set for rpy2:: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Parallel grid search for sklearn's GradientBoosting. | |
This script uses IPython.parallel to run cross-validated | |
grid search on an IPython cluster. Each cell on the parameter grid | |
will be evaluated ``K`` times - results are stored in MongoDB. | |
The procedure tunes the number of trees ``n_estimators`` by averaging | |
the staged scores of the GBRT model averaged over all K folds. | |
You need an IPython ipcluster to connect to - for local use simply |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"error": 42716.2954, "samples": 506, "value": [22.532806324110698], "label": "RM <= 6.94", "type": "split", "children": [{"error": 17317.3210, "samples": 430, "value": [19.93372093023257], "label": "LSTAT <= 14.40", "type": "split", "children": [{"error": 6632.2175, "samples": 255, "value": [23.349803921568636], "label": "DIS <= 1.38", "type": "split", "children": [{"error": 390.7280, "samples": 5, "value": [45.58], "label": "CRIM <= 10.59", "type": "split", "children": [{"error": 0.0000, "samples": 4, "value": [50.0], "label": "Leaf - 4", "type": "leaf"}, {"error": 0.0000, "samples": 1, "value": [27.9], "label": "Leaf - 5", "type": "leaf"}]}, {"error": 3721.1632, "samples": 250, "value": [22.90520000000001], "label": "RM <= 6.54", "type": "split", "children": [{"error": 1636.0675, "samples": 195, "value": [21.629743589743576], "label": "LSTAT <= 7.57", "type": "split", "children": [{"error": 129.6307, "samples": 43, "value": [23.969767441860473], "label": "TAX <= 222.50", "type": "split", "children": [{"err |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy import genfromtxt | |
from sklearn.ensemble import GradientBoostingClassifier | |
def main(): | |
dataset = genfromtxt(open('train_subset.csv','r'), delimiter=',', dtype='float64') | |
clf = GradientBoostingClassifier(n_estimators=100, learn_rate=1.0, max_depth=1, random_state=0) | |
X = dataset[:,1:] | |
y = dataset[:,0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn import datasets | |
from sklearn.ensemble import gradient_boosting | |
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor | |
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | |
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) | |
X = X.astype(np.float32) | |
NewerOlder