Gilles Louppe glouppe

## test_set.csv
SMILES,CHEM_ID
COC1:C:C(C2C3=C(CCCC3=O)N(C3:C:C:C(C):C:C:3)C3=C2C(=O)CCC3):C([N+](=O)[O-]):C:C:1OC,Chem_1
O=C1NC(N2CCCCC2)=NC1=CC1:C:C:C:S:1,Chem_2
COC1:C:C(C=C2C(=O)N(C(=O)C3:C:C:C(Cl):C:C:3)N=C2C):C:C(OC):C:1OC,Chem_3
CC#CC(O)(C(=O)OC1CCN(C)CC1)C1CCCCC1,Chem_4
COC1:C:C:C(N=C(C)C(C)=NC2:C:C:C(OC):C:C:2):C:C:1,Chem_5
CSC1:N:C(O):C(C#N):C(C2:C:C:C(C):C:C:2):N:1,Chem_6
CSC1:N:C(C2:C:C:C:C:C:2):N:C(N2CCOCC2):[S+]:1.[IH2+],Chem_7
CC1:C:C:C(C=C2N=C(NN=CC(O)C(O)C(O)CO)NC2=O):C:C:1,Chem_8
CN(C)C(C1=C(O)C(C2:C:C:C:C:C:2)N(C2:C:C:C:C:C:2)C1=O)N1CCOCC1,Chem_9

## buffering.py
import multiprocessing as mp
import queue
import threading

def buffered_gen_mp(source_gen, buffer_size=2):
    """
    Generator that runs a slow source generator in a separate process.
    buffer_size: the maximal number of items to pre-generate (length of the buffer)
    """
    if buffer_size < 2:

## sklearn_vs_tmva.py
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from rep.estimators import TMVAClassifier

from functools import partial

## beard_export_pairs.py
import sys
sys.path.append("/usr/lib/python2.7/dist-packages/")
sys.path.append("/usr/local/lib/python2.7/dist-packages/")

import string
import re

from joblib import Parallel, delayed
from invenio.dbquery import run_sql
from invenio.bibauthorid_dbinterface import get_title_of_paper

## beard_disambiguation.py
import numpy as np
import argparse
import cPickle
import scipy.cluster.hierarchy as hac

from itertools import groupby
from itertools import product
from scipy.sparse import lil_matrix
from scipy.sparse import issparse
from scipy.spatial.distance import squareform

## nearest_developers.py
import numpy as np
import os

from collections import defaultdict
from git import Repo
from scipy.sparse import csc_matrix

path = "/home/gilles/Sources/scikit-learn/sklearn/"
extensions = ["py", "pyx", "pxd"]

## sklearn_vs_wiserf.py
import numpy
import random
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
# Define training and testing sets
inds = numpy.arange(len(mnist.data))
test_i = random.sample(xrange(len(inds)), int(0.1*len(inds)))
train_i = numpy.delete(inds, test_i)
X_train = mnist.data[train_i].astype(numpy.double)
y_train = mnist.target[train_i].astype(numpy.double)
	SMILES,CHEM_ID
	COC1:C:C(C2C3=C(CCCC3=O)N(C3:C:C:C(C):C:C:3)C3=C2C(=O)CCC3):C([N+](=O)[O-]):C:C:1OC,Chem_1
	O=C1NC(N2CCCCC2)=NC1=CC1:C:C:C:S:1,Chem_2
	COC1:C:C(C=C2C(=O)N(C(=O)C3:C:C:C(Cl):C:C:3)N=C2C):C:C(OC):C:1OC,Chem_3
	CC#CC(O)(C(=O)OC1CCN(C)CC1)C1CCCCC1,Chem_4
	COC1:C:C:C(N=C(C)C(C)=NC2:C:C:C(OC):C:C:2):C:C:1,Chem_5
	CSC1:N:C(O):C(C#N):C(C2:C:C:C(C):C:C:2):N:1,Chem_6
	CSC1:N:C(C2:C:C:C:C:C:2):N:C(N2CCOCC2):[S+]:1.[IH2+],Chem_7
	CC1:C:C:C(C=C2N=C(NN=CC(O)C(O)C(O)CO)NC2=O):C:C:1,Chem_8
	CN(C)C(C1=C(O)C(C2:C:C:C:C:C:2)N(C2:C:C:C:C:C:2)C1=O)N1CCOCC1,Chem_9
	import multiprocessing as mp
	import queue
	import threading

	def buffered_gen_mp(source_gen, buffer_size=2):
	"""
	Generator that runs a slow source generator in a separate process.
	buffer_size: the maximal number of items to pre-generate (length of the buffer)
	"""
	if buffer_size < 2:
	import numpy as np

	from sklearn.cross_validation import train_test_split
	from sklearn.datasets import fetch_mldata
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.ensemble import ExtraTreesClassifier
	from sklearn.ensemble import GradientBoostingClassifier
	from rep.estimators import TMVAClassifier

	from functools import partial
	import sys
	sys.path.append("/usr/lib/python2.7/dist-packages/")
	sys.path.append("/usr/local/lib/python2.7/dist-packages/")

	import string
	import re

	from joblib import Parallel, delayed
	from invenio.dbquery import run_sql
	from invenio.bibauthorid_dbinterface import get_title_of_paper
	import numpy as np
	import argparse
	import cPickle
	import scipy.cluster.hierarchy as hac

	from itertools import groupby
	from itertools import product
	from scipy.sparse import lil_matrix
	from scipy.sparse import issparse
	from scipy.spatial.distance import squareform
	import numpy as np
	import os

	from collections import defaultdict
	from git import Repo
	from scipy.sparse import csc_matrix

	path = "/home/gilles/Sources/scikit-learn/sklearn/"
	extensions = ["py", "pyx", "pxd"]
	import numpy
	import random
	from sklearn.datasets import fetch_mldata
	mnist = fetch_mldata('MNIST original')
	# Define training and testing sets
	inds = numpy.arange(len(mnist.data))
	test_i = random.sample(xrange(len(inds)), int(0.1*len(inds)))
	train_i = numpy.delete(inds, test_i)
	X_train = mnist.data[train_i].astype(numpy.double)
	y_train = mnist.target[train_i].astype(numpy.double)