Skip to content

Instantly share code, notes, and snippets.

@pprett
pprett / tksvm.py
Created Nov 12, 2010
A simple graphical frontend for scikit.learn Libsvm bindings.
View tksvm.py
"""
==========
Libsvm GUI
==========
A simple graphical frontend for Libsvm mainly intended for didactic
purposes. You can create data points by point and click and visualize
the decision region induced by different kernels and parameter settings.
To create positive examples click the left mouse button; to create
@pprett
pprett / mk_product_cat_dataset.py
Created Mar 24, 2011
Cross-lingual product category dataset creation script.
View mk_product_cat_dataset.py
#!/usr/bin/python
"""Creates the product category dataset from the Cross-Lingual
Sentiment dataset [1]. The output can be used directly with the
CLSCL reference implementation in NUT [2].
Usage:
./mk_product_cat_dataset.py {en|de|fr|jp} {train|test|unlabeled} output_dir num_docs
@pprett
pprett / checkerboards.py
Created Mar 28, 2011
checkerboards covariate shift example
View checkerboards.py
#!/usr/bin/python
"""
Run python checkerboards.py
Example from:
M. Hein (2009). Binary Classification under Sample Selection Bias, In Dataset Shift in Machine Learning, chap. 3, pp. 41-64. The MIT Press.
"""
from __future__ import division
import matplotlib
@pprett
pprett / joblib_test.py
Created Apr 6, 2011
Joblib.Parallel explicit argument parsing
View joblib_test.py
from __future__ import division
import gc
import numpy as np
from time import sleep
from ext.joblib import Parallel, delayed
from multiprocessing import Process, current_process
from scikits.learn import svm, linear_model
@pprett
pprett / njobsbug.py
Created May 24, 2011
LinearSVC bug with n_jobs
View njobsbug.py
#!/usr/bin/python
import sys
import numpy as np
from pprint import pprint
from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.grid_search import GridSearchCV
from scikits.learn import svm
from scikits.learn.metrics import zero_one_score, f1_score, classification_report
@pprett
pprett / linearsvc_vs_svc.py
Created May 24, 2011
High difference in classifier accuracies with LinearSVC and SVC v2
View linearsvc_vs_svc.py
"""High difference in classifier accuracies with LinearSVC and SVC.
Get data.npz from [1].
[1] https://docs.google.com/leaf?id=0B1BhwRZOwyxRZTcxZDA1OWMtZjZkMy00YjgxLWI3ZTMtZjJkNGIyODAyOTQy&hl=en_US
"""
print __doc__
import numpy as np
from functools import partial
@pprett
pprett / benchmark_gbm.py
Created Nov 4, 2011
Benchmark R's gbm module via rpy2
View benchmark_gbm.py
"""
Benchmark script to bench R's gbm package via rpy2.
NOTE::
make sure you run
$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64/R/lib
"""
@pprett
pprett / bench_yahoo_ltrc.py
Created Mar 13, 2012
Sklearn Yahoo LTRC 2010 Benchmark script
View bench_yahoo_ltrc.py
import numpy as np
import svmlight_loader
from sklearn.ensemble import GradientBoostingRegressor
from time import time
ROOT_DIR = '/home/pprett/corpora/yahoo-ltrc-2010/data'
X_train, y_train = svmlight_loader.load_svmlight_file(ROOT_DIR + '/set1.train.txt',
n_features=700,
@pprett
pprett / bench_tree.py
Created Jul 11, 2012
Simple and stupid benchmark for sklearn DecisionTreeRegressor
View bench_tree.py
import numpy as np
from sklearn import datasets
from sklearn.ensemble import gradient_boosting
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X = X.astype(np.float32)
@pprett
pprett / test_subset.py
Created Aug 7, 2012
Exhibits error in GradientBoostingClassifier
View test_subset.py
from numpy import genfromtxt
from sklearn.ensemble import GradientBoostingClassifier
def main():
dataset = genfromtxt(open('train_subset.csv','r'), delimiter=',', dtype='float64')
clf = GradientBoostingClassifier(n_estimators=100, learn_rate=1.0, max_depth=1, random_state=0)
X = dataset[:,1:]
y = dataset[:,0]
You can’t perform that action at this time.