Skip to content

Instantly share code, notes, and snippets.

View amueller's full-sized avatar

Andreas Mueller amueller

View GitHub Profile
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@amueller
amueller / mnist_kernel_approximation.py
Created November 27, 2012 19:41
mnist kernel approximation
# Standard scientific Python imports
import pylab as pl
import numpy as np
from time import time
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, pipeline
from sklearn.kernel_approximation import (RBFSampler,
Nystroem)
from sklearn.utils import shuffle
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
def main():
mnist = fetch_mldata("MNIST original")
X_all, y_all = mnist.data/255., mnist.target
print("scaling")
X = X_all[:60000, :]
y = y_all[:60000]
@amueller
amueller / commits.py
Created October 26, 2018 19:26
list recent commits by author
from github import Github
gh = Github("SECRETKEY")
rep = gh.get_repo("scikit-learn/scikit-learn")
org = gh.get_organization("scikit-learn")
org_members = list(org.get_members())
import datetime
n_commits = {}
limit = datetime.datetime(2017, 1, 1)
@amueller
amueller / parsing_in_preparation.ipynb
Created September 28, 2018 16:29
parsing in preparation datasets on openml
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import cvxpy as cvx
n_students = 130
n_projects = 30
assignment = cvx.Int(rows=n_students, cols=n_projects)
import numpy as np
rng = np.random.RandomState(0)
project_preferences = rng.rand(n_students, n_projects)
@amueller
amueller / km_seg.py
Created June 19, 2012 21:54
Python Golf: k-means based image segmentation
import numpy as np
def km_segmentation(image, n_segments=100, ratio=50, max_iter=100):
# initialize on grid:
height, width = image.shape[:2]
# approximate grid size for desired n_segments
step = np.sqrt(height * width / n_segments)
grid_y, grid_x = np.mgrid[:height, :width]
means_y = grid_y[::step, ::step]
@amueller
amueller / tree_plotting.py
Created February 15, 2018 21:29
Stand-alone matplotlib based tree plotting from https://github.com/scikit-learn/scikit-learn/pull/9251
import numpy as np
from numbers import Integral
from sklearn.externals import six
from sklearn.tree.export import _color_brew, _criterion, _tree
def plot_tree(decision_tree, max_depth=None, feature_names=None,
class_names=None, label='all', filled=False,
leaves_parallel=False, impurity=True, node_ids=False,
@amueller
amueller / bench_feat_agg.py
Created October 27, 2017 17:37
bench feature agglomeration
"""
Benchmarks np.bincount method vs np.mean for feature agglomeration in
../sklearn/cluster/_feature_agglomeration. Use of np.bincount provides
a significant speed up if the pooling function is np.mean.
np.bincount performs better especially as the size of X and n_clusters
increase.
"""
import matplotlib.pyplot as plt
import numpy as np
@amueller
amueller / sklearn_cluster.py
Created January 30, 2012 16:18
Scikit-learn rocks the cluster!
import numpy as np
from IPython.parallel import Client
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import Scaler
from sklearn.utils import shuffle