Skip to content

Instantly share code, notes, and snippets.

View amueller's full-sized avatar

Andreas Mueller amueller

View GitHub Profile
@amueller
amueller / gist:1351047
Created November 9, 2011 10:23
sklearn precomputed kernel example
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn.metrics import zero_one_score
import numpy as np
digits = load_digits()
X, y = shuffle(digits.data, digits.target)
X_train, X_test = X[:1000, :], X[1000:, :]
@amueller
amueller / kneighbors_weired.py
Created January 23, 2012 21:30
Weird kneibors behaviour
from sklearn import datasets, manifold
from sklearn.neighbors import NearestNeighbors
import numpy as np
n_points = 1000
n_neighbors = 10
out_dim = 2
n_trials = 100
@amueller
amueller / sklearn_cluster.py
Created January 30, 2012 16:18
Scikit-learn rocks the cluster!
import numpy as np
from IPython.parallel import Client
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import Scaler
from sklearn.utils import shuffle
@amueller
amueller / dpgmm_sampler.py
Created March 10, 2012 13:31
Nonparametric Gaussian mixture model data sampling
import numpy as np
import scipy.stats
class ChineseRestaurantProcess(object):
def __init__(self, alpha):
self.alpha = alpha
self.customers = []
def sample(self, n_samples=1):
samples = []
@amueller
amueller / mlp.py
Created March 17, 2012 15:59
Multi-Layer Perceptron for scikit-learn with SGD in Python
import numpy as np
import warnings
from itertools import cycle, izip
from sklearn.utils import gen_even_slices
from sklearn.utils import shuffle
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelBinarizer
@amueller
amueller / test_c.py
Created April 1, 2012 14:16
Testing influence of dataset size on C
import numpy as np
from sklearn import datasets
from sklearn.cross_validation import ShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import Scaler
#data = datasets.load_digits()
data = datasets.fetch_mldata("usps")
@amueller
amueller / scale_c.py
Created April 10, 2012 21:45
scale_c test
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.cross_validation import ShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn import datasets
n_samples = 100
@amueller
amueller / cv_bug.py
Created April 17, 2012 11:51
weird bug
import numpy as np
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
from sklearn.datasets import load_iris
from sklearn.svm import LinearSVC
iris = load_iris()
X, y = iris.data, iris.target
cv = StratifiedKFold(y, 3)
@amueller
amueller / learning_gabor_filters.py
Created April 19, 2012 11:39
Learning Gabor filters with scikit-learn and ICA or k-means
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import FastICA, PCA
from sklearn.cluster import KMeans
# fetch natural image patches
image_patches = fetch_mldata("natural scenes data")
X = image_patches.data
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
def main():
mnist = fetch_mldata("MNIST original")
X_all, y_all = mnist.data/255., mnist.target
print("scaling")
X = X_all[:60000, :]
y = y_all[:60000]