Joblib.Parallel explicit argument parsing
from __future__ import division | |
import gc | |
import numpy as np | |
from time import sleep | |
from ext.joblib import Parallel, delayed | |
from multiprocessing import Process, current_process | |
from scikits.learn import svm, linear_model | |
def _score(i, X_train, y_train, X_test, y_test): | |
clf = svm.LinearSVC(C=10, tol=1e-3, dual=False).fit(X_train, y_train) | |
#clf = linear_model.SGDClassifier(alpha=0.001, n_iter=10).fit(X_train, y_train) | |
return (y_test == clf.predict(X_test)).mean() | |
if __name__ == '__main__': | |
import gzip | |
f = gzip.open('/home/pprett/workspace/scikit-learn/covtype.data.gz') | |
X = np.fromstring(f.read().replace(",", " "), dtype=np.float64, sep=" ", | |
count=-1) | |
X = X.reshape((581012, 55), order="C") | |
f.close() | |
y = X[:, -1] | |
X = X[:, :-1] | |
idx = np.arange(X.shape[0]) | |
np.random.seed(13) | |
np.random.shuffle(idx) | |
offset = 100000 # 522911 | |
train_idx = idx[:offset] | |
test_idx = idx[offset:] | |
X_train = np.array(X[train_idx], order="C") | |
y_train = np.array(y[train_idx], order="C") | |
X_test = np.array(X[test_idx], order="C") | |
y_test = np.array(y[test_idx], order="C") | |
# free memory | |
del X | |
del y | |
mean = X_train.mean(axis=0) | |
std = X_train.std(axis=0) | |
mean[10:] = 0.0 | |
std[10:] = 1.0 | |
X_train = (X_train-mean) / std | |
X_test = (X_test-mean) / std | |
del std | |
del mean | |
gc.collect() | |
pid = current_process().pid | |
print "master pid:", pid | |
print "|X_train| =", X_train.shape[0] | |
print "|X_test| =", X_test.shape[0] | |
print "X_train: %.2f MB" % (X_train.nbytes / 1024 / 1024) | |
print "X_test: %.2f MB" % (X_test.nbytes / 1024 / 1024) | |
X_train.flags.writeable = False | |
X_test.flags.writeable = False | |
sleep(2) | |
print "lets go!" | |
scores = Parallel(n_jobs=2, verbose=1)( | |
delayed(_score)(i, X_train, y_train, X_test, y_test) | |
for i in range(10)) | |
print np.mean(scores) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment