Skip to content

Instantly share code, notes, and snippets.

@pprett
pprett / joblib_killer.py
Created Jun 20, 2014
joblib hangs if job segfaults
View joblib_killer.py
import numpy as np
from sklearn.ensemble import gradient_boosting
import time
from joblib import Parallel, delayed
class Bad(object):
tree_ = None
@pprett
pprett / lightning_convergence.py
Created Oct 6, 2014
Lightning CDClassifier does not converge
View lightning_convergence.py
from lightning.impl.primal_cd import CDClassifier
from lightning.impl.datasets.samples_generator import make_classification
bin_dense, bin_target = make_classification(n_samples=20000, n_features=100, n_informative=5,
n_classes=2, random_state=0, flip_y=0.2)
est = CDClassifier(C=1.0, alpha=0.01, random_state=0, penalty="l2", loss="log", verbose=3, max_iter=100)
est.fit(bin_dense[:10000,:], bin_target[:10000])
est.score(bin_dense[10000:,:], bin_target[10000:])
@pprett
pprett / tksvm.py
Created Nov 12, 2010
A simple graphical frontend for scikit.learn Libsvm bindings.
View tksvm.py
"""
==========
Libsvm GUI
==========
A simple graphical frontend for Libsvm mainly intended for didactic
purposes. You can create data points by point and click and visualize
the decision region induced by different kernels and parameter settings.
To create positive examples click the left mouse button; to create
@pprett
pprett / mk_product_cat_dataset.py
Created Mar 24, 2011
Cross-lingual product category dataset creation script.
View mk_product_cat_dataset.py
#!/usr/bin/python
"""Creates the product category dataset from the Cross-Lingual
Sentiment dataset [1]. The output can be used directly with the
CLSCL reference implementation in NUT [2].
Usage:
./mk_product_cat_dataset.py {en|de|fr|jp} {train|test|unlabeled} output_dir num_docs
@pprett
pprett / joblib_test.py
Created Apr 6, 2011
Joblib.Parallel explicit argument parsing
View joblib_test.py
from __future__ import division
import gc
import numpy as np
from time import sleep
from ext.joblib import Parallel, delayed
from multiprocessing import Process, current_process
from scikits.learn import svm, linear_model
@pprett
pprett / linearsvc_vs_svc.py
Created May 24, 2011
High difference in classifier accuracies with LinearSVC and SVC v2
View linearsvc_vs_svc.py
"""High difference in classifier accuracies with LinearSVC and SVC.
Get data.npz from [1].
[1] https://docs.google.com/leaf?id=0B1BhwRZOwyxRZTcxZDA1OWMtZjZkMy00YjgxLWI3ZTMtZjJkNGIyODAyOTQy&hl=en_US
"""
print __doc__
import numpy as np
from functools import partial
@pprett
pprett / njobsbug.py
Created May 24, 2011
LinearSVC bug with n_jobs
View njobsbug.py
#!/usr/bin/python
import sys
import numpy as np
from pprint import pprint
from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.grid_search import GridSearchCV
from scikits.learn import svm
from scikits.learn.metrics import zero_one_score, f1_score, classification_report
@pprett
pprett / benchmark_gbm.py
Created Nov 4, 2011
Benchmark R's gbm module via rpy2
View benchmark_gbm.py
"""
Benchmark script to bench R's gbm package via rpy2.
NOTE::
make sure you run
$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64/R/lib
"""
@pprett
pprett / bench_yahoo_ltrc.py
Created Mar 13, 2012
Sklearn Yahoo LTRC 2010 Benchmark script
View bench_yahoo_ltrc.py
import numpy as np
import svmlight_loader
from sklearn.ensemble import GradientBoostingRegressor
from time import time
ROOT_DIR = '/home/pprett/corpora/yahoo-ltrc-2010/data'
X_train, y_train = svmlight_loader.load_svmlight_file(ROOT_DIR + '/set1.train.txt',
n_features=700,
@pprett
pprett / bench_tree.py
Created Jul 11, 2012
Simple and stupid benchmark for sklearn DecisionTreeRegressor
View bench_tree.py
import numpy as np
from sklearn import datasets
from sklearn.ensemble import gradient_boosting
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X = X.astype(np.float32)
You can’t perform that action at this time.