Andreas Mueller amueller

## precondition_logistic_regression.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                amueller
                / precondition_logistic_regression.ipynb
            
            
              Created
              November 12, 2019 21:00
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## mnist_kernel_approximation.py
  # Standard scientific Python imports
  import pylab as pl
  import numpy as np
  from time import time

  # Import datasets, classifiers and performance metrics
  from sklearn import datasets, svm, pipeline
  from sklearn.kernel_approximation import (RBFSampler,
                                            Nystroem)
  from sklearn.utils import shuffle

## mnist_svm_sklearn.py
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold

def main():
    mnist = fetch_mldata("MNIST original")
    X_all, y_all = mnist.data/255., mnist.target
    print("scaling")
    X = X_all[:60000, :]
    y = y_all[:60000]

## commits.py
from github import Github


gh = Github("SECRETKEY")
rep = gh.get_repo("scikit-learn/scikit-learn")
org = gh.get_organization("scikit-learn")
org_members = list(org.get_members())
import datetime
n_commits = {}
limit = datetime.datetime(2017, 1, 1)

## parsing_in_preparation.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                amueller
                / parsing_in_preparation.ipynb
            
            
              Created
              September 28, 2018 16:29
            
              
                parsing in preparation datasets on openml
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## student_groups.py
import cvxpy as cvx

n_students = 130
n_projects = 30
assignment = cvx.Int(rows=n_students, cols=n_projects)

import numpy as np
rng = np.random.RandomState(0)
project_preferences = rng.rand(n_students, n_projects)

## km_seg.py
import numpy as np


def km_segmentation(image, n_segments=100, ratio=50, max_iter=100):
    # initialize on grid:
    height, width = image.shape[:2]
    # approximate grid size for desired n_segments
    step = np.sqrt(height * width / n_segments)
    grid_y, grid_x = np.mgrid[:height, :width]
    means_y = grid_y[::step, ::step]

## tree_plotting.py
import numpy as np
from numbers import Integral

from sklearn.externals import six
from sklearn.tree.export import _color_brew, _criterion, _tree


def plot_tree(decision_tree, max_depth=None, feature_names=None,
              class_names=None, label='all', filled=False,
              leaves_parallel=False, impurity=True, node_ids=False,

## bench_feat_agg.py
"""
Benchmarks np.bincount method vs np.mean for feature agglomeration in
../sklearn/cluster/_feature_agglomeration. Use of np.bincount provides
a significant speed up if the pooling function is np.mean.

np.bincount performs better especially as the size of X and n_clusters
increase.
"""
import matplotlib.pyplot as plt
import numpy as np

## sklearn_cluster.py
import numpy as np

from IPython.parallel import Client

from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.preprocessing import Scaler
from sklearn.utils import shuffle
	# Standard scientific Python imports
	import pylab as pl
	import numpy as np
	from time import time

	# Import datasets, classifiers and performance metrics
	from sklearn import datasets, svm, pipeline
	from sklearn.kernel_approximation import (RBFSampler,
	Nystroem)
	from sklearn.utils import shuffle
	from sklearn.grid_search import GridSearchCV
	from sklearn.cross_validation import StratifiedKFold

	def main():
	mnist = fetch_mldata("MNIST original")
	X_all, y_all = mnist.data/255., mnist.target
	print("scaling")
	X = X_all[:60000, :]
	y = y_all[:60000]
	from github import Github


	gh = Github("SECRETKEY")
	rep = gh.get_repo("scikit-learn/scikit-learn")
	org = gh.get_organization("scikit-learn")
	org_members = list(org.get_members())
	import datetime
	n_commits = {}
	limit = datetime.datetime(2017, 1, 1)
	import cvxpy as cvx

	n_students = 130
	n_projects = 30
	assignment = cvx.Int(rows=n_students, cols=n_projects)

	import numpy as np
	rng = np.random.RandomState(0)
	project_preferences = rng.rand(n_students, n_projects)
	import numpy as np


	def km_segmentation(image, n_segments=100, ratio=50, max_iter=100):
	# initialize on grid:
	height, width = image.shape[:2]
	# approximate grid size for desired n_segments
	step = np.sqrt(height * width / n_segments)
	grid_y, grid_x = np.mgrid[:height, :width]
	means_y = grid_y[::step, ::step]
	import numpy as np
	from numbers import Integral

	from sklearn.externals import six
	from sklearn.tree.export import _color_brew, _criterion, _tree


	def plot_tree(decision_tree, max_depth=None, feature_names=None,
	class_names=None, label='all', filled=False,
	leaves_parallel=False, impurity=True, node_ids=False,
	"""
	Benchmarks np.bincount method vs np.mean for feature agglomeration in
	../sklearn/cluster/_feature_agglomeration. Use of np.bincount provides
	a significant speed up if the pooling function is np.mean.

	np.bincount performs better especially as the size of X and n_clusters
	increase.
	"""
	import matplotlib.pyplot as plt
	import numpy as np
	import numpy as np

	from IPython.parallel import Client

	from sklearn.grid_search import GridSearchCV
	from sklearn.cross_validation import KFold
	from sklearn.svm import SVC
	from sklearn import datasets
	from sklearn.preprocessing import Scaler
	from sklearn.utils import shuffle