Skip to content

Instantly share code, notes, and snippets.

View glouppe's full-sized avatar

Gilles Louppe glouppe

View GitHub Profile
We can't make this file beautiful and searchable because it's too large.
SMILES,CHEM_ID
COC1:C:C(C2C3=C(CCCC3=O)N(C3:C:C:C(C):C:C:3)C3=C2C(=O)CCC3):C([N+](=O)[O-]):C:C:1OC,Chem_1
O=C1NC(N2CCCCC2)=NC1=CC1:C:C:C:S:1,Chem_2
COC1:C:C(C=C2C(=O)N(C(=O)C3:C:C:C(Cl):C:C:3)N=C2C):C:C(OC):C:1OC,Chem_3
CC#CC(O)(C(=O)OC1CCN(C)CC1)C1CCCCC1,Chem_4
COC1:C:C:C(N=C(C)C(C)=NC2:C:C:C(OC):C:C:2):C:C:1,Chem_5
CSC1:N:C(O):C(C#N):C(C2:C:C:C(C):C:C:2):N:1,Chem_6
CSC1:N:C(C2:C:C:C:C:C:2):N:C(N2CCOCC2):[S+]:1.[IH2+],Chem_7
CC1:C:C:C(C=C2N=C(NN=CC(O)C(O)C(O)CO)NC2=O):C:C:1,Chem_8
CN(C)C(C1=C(O)C(C2:C:C:C:C:C:2)N(C2:C:C:C:C:C:2)C1=O)N1CCOCC1,Chem_9
import multiprocessing as mp
import queue
import threading
def buffered_gen_mp(source_gen, buffer_size=2):
"""
Generator that runs a slow source generator in a separate process.
buffer_size: the maximal number of items to pre-generate (length of the buffer)
"""
if buffer_size < 2:
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from rep.estimators import TMVAClassifier
from functools import partial
import sys
sys.path.append("/usr/lib/python2.7/dist-packages/")
sys.path.append("/usr/local/lib/python2.7/dist-packages/")
import string
import re
from joblib import Parallel, delayed
from invenio.dbquery import run_sql
from invenio.bibauthorid_dbinterface import get_title_of_paper
@glouppe
glouppe / beard_disambiguation.py
Created January 7, 2015 15:49
Disambiguation prototype
import numpy as np
import argparse
import cPickle
import scipy.cluster.hierarchy as hac
from itertools import groupby
from itertools import product
from scipy.sparse import lil_matrix
from scipy.sparse import issparse
from scipy.spatial.distance import squareform
@glouppe
glouppe / nearest_developers.py
Last active December 23, 2015 21:39
Generate a sparse matrix such that rows=users, columns=filenames and data[i, j]=number of commits of user i on file j, and then find the 3 nearest neighbors of each scikit-learn contributor.
import numpy as np
import os
from collections import defaultdict
from git import Repo
from scipy.sparse import csc_matrix
path = "/home/gilles/Sources/scikit-learn/sklearn/"
extensions = ["py", "pyx", "pxd"]
import numpy
import random
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
# Define training and testing sets
inds = numpy.arange(len(mnist.data))
test_i = random.sample(xrange(len(inds)), int(0.1*len(inds)))
train_i = numpy.delete(inds, test_i)
X_train = mnist.data[train_i].astype(numpy.double)
y_train = mnist.target[train_i].astype(numpy.double)