Peter Prettenhofer pprett

## tksvm.py
"""
==========
Libsvm GUI
==========

A simple graphical frontend for Libsvm mainly intended for didactic
purposes. You can create data points by point and click and visualize
the decision region induced by different kernels and parameter settings.

To create positive examples click the left mouse button; to create

## mk_product_cat_dataset.py
#!/usr/bin/python

"""Creates the product category dataset from the Cross-Lingual
Sentiment dataset [1]. The output can be used directly with the
CLSCL reference implementation in NUT [2].

Usage:

  ./mk_product_cat_dataset.py {en|de|fr|jp} {train|test|unlabeled} output_dir num_docs

## checkerboards.py
#!/usr/bin/python
"""
Run python checkerboards.py

Example from:
M. Hein (2009). Binary Classification under Sample Selection Bias, In Dataset Shift in Machine Learning, chap. 3, pp. 41-64. The MIT Press.
"""

from __future__ import division
import matplotlib

## joblib_test.py
from __future__ import division

import gc
import numpy as np
from time import sleep
from ext.joblib import Parallel, delayed
from multiprocessing import Process, current_process
from scikits.learn import svm, linear_model


## njobsbug.py
#!/usr/bin/python

import sys
import numpy as np
from pprint import pprint

from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.grid_search import GridSearchCV
from scikits.learn import svm
from scikits.learn.metrics import zero_one_score, f1_score, classification_report

## linearsvc_vs_svc.py
"""High difference in classifier accuracies with LinearSVC and SVC.

Get data.npz from [1].

[1] https://docs.google.com/leaf?id=0B1BhwRZOwyxRZTcxZDA1OWMtZjZkMy00YjgxLWI3ZTMtZjJkNGIyODAyOTQy&hl=en_US
"""
print __doc__

import numpy as np
from functools import partial

## benchmark_gbm.py
"""
Benchmark script to bench R's gbm package via rpy2.

NOTE::

make sure you run
$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64/R/lib

"""

## bench_yahoo_ltrc.py
import numpy as np
import svmlight_loader

from sklearn.ensemble import GradientBoostingRegressor

from time import time

ROOT_DIR = '/home/pprett/corpora/yahoo-ltrc-2010/data'
X_train, y_train = svmlight_loader.load_svmlight_file(ROOT_DIR + '/set1.train.txt',
                                                      n_features=700,

## bench_tree.py
import numpy as np
from sklearn import datasets
from sklearn.ensemble import gradient_boosting
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
X = X.astype(np.float32)


## test_subset.py
from numpy import genfromtxt
from sklearn.ensemble import GradientBoostingClassifier

def main():
	dataset = genfromtxt(open('train_subset.csv','r'), delimiter=',', dtype='float64')
	clf = GradientBoostingClassifier(n_estimators=100, learn_rate=1.0, max_depth=1, random_state=0)

	X = dataset[:,1:]
	y = dataset[:,0]
	"""
	==========
	Libsvm GUI
	==========

	A simple graphical frontend for Libsvm mainly intended for didactic
	purposes. You can create data points by point and click and visualize
	the decision region induced by different kernels and parameter settings.

	To create positive examples click the left mouse button; to create
	#!/usr/bin/python

	"""Creates the product category dataset from the Cross-Lingual
	Sentiment dataset [1]. The output can be used directly with the
	CLSCL reference implementation in NUT [2].

	Usage:

	./mk_product_cat_dataset.py {en\|de\|fr\|jp} {train\|test\|unlabeled} output_dir num_docs
	#!/usr/bin/python
	"""
	Run python checkerboards.py

	Example from:
	M. Hein (2009). Binary Classification under Sample Selection Bias, In Dataset Shift in Machine Learning, chap. 3, pp. 41-64. The MIT Press.
	"""

	from __future__ import division
	import matplotlib
	from __future__ import division

	import gc
	import numpy as np
	from time import sleep
	from ext.joblib import Parallel, delayed
	from multiprocessing import Process, current_process
	from scikits.learn import svm, linear_model
	#!/usr/bin/python

	import sys
	import numpy as np
	from pprint import pprint

	from scikits.learn.cross_val import StratifiedKFold
	from scikits.learn.grid_search import GridSearchCV
	from scikits.learn import svm
	from scikits.learn.metrics import zero_one_score, f1_score, classification_report
	"""High difference in classifier accuracies with LinearSVC and SVC.

	Get data.npz from [1].

	[1] https://docs.google.com/leaf?id=0B1BhwRZOwyxRZTcxZDA1OWMtZjZkMy00YjgxLWI3ZTMtZjJkNGIyODAyOTQy&hl=en_US
	"""
	print __doc__

	import numpy as np
	from functools import partial
	"""
	Benchmark script to bench R's gbm package via rpy2.

	NOTE::

	make sure you run
	$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64/R/lib

	"""
	import numpy as np
	import svmlight_loader

	from sklearn.ensemble import GradientBoostingRegressor

	from time import time

	ROOT_DIR = '/home/pprett/corpora/yahoo-ltrc-2010/data'
	X_train, y_train = svmlight_loader.load_svmlight_file(ROOT_DIR + '/set1.train.txt',
	n_features=700,
	import numpy as np
	from sklearn import datasets
	from sklearn.ensemble import gradient_boosting
	from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
	from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

	X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
	X = X.astype(np.float32)
	from numpy import genfromtxt
	from sklearn.ensemble import GradientBoostingClassifier

	def main():
	dataset = genfromtxt(open('train_subset.csv','r'), delimiter=',', dtype='float64')
	clf = GradientBoostingClassifier(n_estimators=100, learn_rate=1.0, max_depth=1, random_state=0)

	X = dataset[:,1:]
	y = dataset[:,0]