Skip to content

Instantly share code, notes, and snippets.

@raamana
Last active August 8, 2016 19:29
Show Gist options
  • Save raamana/382927a527c87746b1ef to your computer and use it in GitHub Desktop.
Save raamana/382927a527c87746b1ef to your computer and use it in GitHub Desktop.
Test cases for reproducing joblib's memmap bug (in the context of scikit-learn's k-fold CV)
#!/usr/bin/env python
import sys
# from joblib import Parallel, delayed
from sklearn.externals.joblib import Parallel, delayed
from sklearn.model_selection import KFold
import numpy as np
def overlapping(test, train):
# print('test : %s' % test)
# print('train : %s' % train)
test = np.array(test)
train = np.array(train)
inter = np.intersect1d(test, train)
return inter.shape[0]
if __name__ == '__main__':
for num_jobs in range(1,6):
for num_folds in [ 3, 5, 7, 10, 15, 20 ]:
for exp in range(2,7):
num_samples = 10**exp
sys.stdout.write('#jobs: {0:2d}, K = {1:3d}, N = {2:10d} : '.format(num_jobs,num_folds,num_samples))
X = np.zeros(num_samples)
y = np.zeros(num_samples)
k_fold = KFold(n_folds=num_folds, shuffle=False)
inter = Parallel(n_jobs=num_jobs, verbose=0, max_nbytes=0)\
(delayed(overlapping)(train, test) for train, test in k_fold.split(X, y))
sys.stdout.write('Intersection, with memmap : %10d, ' % np.sum(inter))
k_fold = KFold(n_folds=num_folds, shuffle=False)
inter = Parallel(n_jobs=num_jobs, verbose=0, max_nbytes=None)\
(delayed(overlapping)(train, test) for train,test in k_fold.split(X, y))
sys.stdout.write('no memmap : %10d' % np.sum(inter))
print " "
print " "
#jobs: 1, K = 3, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 3, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 3, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 3, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 3, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 5, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 5, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 5, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 5, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 5, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 7, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 7, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 7, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 7, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 7, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 10, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 10, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 10, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 10, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 10, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 15, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 15, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 15, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 15, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 15, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 20, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 20, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 20, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 20, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 1, K = 20, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 3, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 3, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 3, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 3, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 3, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 5, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 5, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 5, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 5, N = 100000 : Intersection, with memmap : 20000, no memmap : 0
#jobs: 2, K = 5, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 7, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 7, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 7, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 7, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 7, N = 1000000 : Intersection, with memmap : 857143, no memmap : 0
#jobs: 2, K = 10, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 10, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 10, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 10, N = 100000 : Intersection, with memmap : 100000, no memmap : 0
#jobs: 2, K = 10, N = 1000000 : Intersection, with memmap : 400000, no memmap : 0
#jobs: 2, K = 15, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 15, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 15, N = 10000 : Intersection, with memmap : 8666, no memmap : 0
#jobs: 2, K = 15, N = 100000 : Intersection, with memmap : 6667, no memmap : 0
#jobs: 2, K = 15, N = 1000000 : Intersection, with memmap : 933333, no memmap : 0
#jobs: 2, K = 20, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 20, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 2, K = 20, N = 10000 : Intersection, with memmap : 9000, no memmap : 0
#jobs: 2, K = 20, N = 100000 : Intersection, with memmap : 100000, no memmap : 0
#jobs: 2, K = 20, N = 1000000 : Intersection, with memmap : 1200000, no memmap : 0
#jobs: 3, K = 3, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 3, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 3, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 3, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 3, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 5, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 5, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 5, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 5, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 5, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 7, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 7, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 7, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 7, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 7, N = 1000000 : Intersection, with memmap : 142857, no memmap : 0
#jobs: 3, K = 10, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 10, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 10, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 10, N = 100000 : Intersection, with memmap : 100000, no memmap : 0
#jobs: 3, K = 10, N = 1000000 : Intersection, with memmap : 1800000, no memmap : 0
#jobs: 3, K = 15, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 15, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 15, N = 10000 : Intersection, with memmap : 8667, no memmap : 0
#jobs: 3, K = 15, N = 100000 : Intersection, with memmap : 6667, no memmap : 0
#jobs: 3, K = 15, N = 1000000 : Intersection, with memmap : 1733333, no memmap : 0
#jobs: 3, K = 20, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 20, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 3, K = 20, N = 10000 : Intersection, with memmap : 9000, no memmap : 0
#jobs: 3, K = 20, N = 100000 : Intersection, with memmap : 185000, no memmap : 0
#jobs: 3, K = 20, N = 1000000 : Intersection, with memmap : 3000000, no memmap : 0
#jobs: 4, K = 3, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 3, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 3, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 3, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 3, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 5, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 5, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 5, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 5, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 5, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 7, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 7, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 7, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 7, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 7, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 10, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 10, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 10, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 10, N = 100000 : Intersection, with memmap : 10000, no memmap : 0
#jobs: 4, K = 10, N = 1000000 : Intersection, with memmap : 900000, no memmap : 0
#jobs: 4, K = 15, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 15, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 15, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 15, N = 100000 : Intersection, with memmap : 86667, no memmap : 0
#jobs: 4, K = 15, N = 1000000 : Intersection, with memmap : 1733334, no memmap : 0
#jobs: 4, K = 20, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 20, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 4, K = 20, N = 10000 : Intersection, with memmap : 9000, no memmap : 0
#jobs: 4, K = 20, N = 100000 : Intersection, with memmap : 185000, no memmap : 0
#jobs: 4, K = 20, N = 1000000 : Intersection, with memmap : 2800000, no memmap : 0
#jobs: 5, K = 3, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 3, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 3, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 3, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 3, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 5, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 5, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 5, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 5, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 5, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 7, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 7, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 7, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 7, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 7, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 10, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 10, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 10, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 10, N = 100000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 10, N = 1000000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 15, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 15, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 15, N = 10000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 15, N = 100000 : Intersection, with memmap : 6666, no memmap : 0
#jobs: 5, K = 15, N = 1000000 : Intersection, with memmap : 866667, no memmap : 0
#jobs: 5, K = 20, N = 100 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 20, N = 1000 : Intersection, with memmap : 0, no memmap : 0
#jobs: 5, K = 20, N = 10000 : Intersection, with memmap : 9000, no memmap : 0
#jobs: 5, K = 20, N = 100000 : Intersection, with memmap : 180000, no memmap : 0
#jobs: 5, K = 20, N = 1000000 : Intersection, with memmap : 2000000, no memmap : 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment