mrgloom

## pca memmap
import numpy as np
import time

def read_data():
	#M x N
	data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
	print data.shape
	# print data
	return data


## gist:a8e329b7087281ec4f61
def pca(data,k):
	#data M x N
	#get mean
	mean= np.mean(data,axis=0) # N long
	# print mean.shape
	# print mean

	#M x N
	data_c= (data-mean)
	print data_c.shape

## gist:91f494e0fbe3ce595d2a
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap

#in case of memmap no need to use batch processing

#also can test hdf5 and pytables for matrix mult

#can use matrix mult for pca (more smart to use randompca)

#need to test it on x64 machine

## learning_gabor_filters.py
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_mldata
from sklearn.decomposition import FastICA, PCA
from sklearn.cluster import KMeans

# fetch natural image patches
image_patches = fetch_mldata("natural scenes data")
X = image_patches.data

## gist:11046631
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap

#in case of memmap no need to use batch processing

#also can test hdf5 and pytables for matrix mult

#can use matrix mult for pca (more smart to use randompca)

#need to test it on x64 machine

## kmeans.py
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.

from collections import defaultdict

## gist:7783666
import numpy as np
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle
import time

#out-of-core \ online
#http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html

## mnist_svm_sklearn.py
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold

def main():
    mnist = fetch_mldata("MNIST original")
    X_all, y_all = mnist.data/255., mnist.target
    print("scaling")
    X = X_all[:60000, :]
    y = y_all[:60000]

## MNIST PCA projection
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from sklearn.decomposition import RandomizedPCA
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle

#use all digits
mnist = fetch_mldata("MNIST original")
X_train, y_train = mnist.data[:70000] / 255., mnist.target[:70000]

## gist:6620665
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from sklearn.decomposition import RandomizedPCA
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle

mnist = fetch_mldata("MNIST original")
X_train, y_train = mnist.data[:60000] / 255., mnist.target[:60000]
	import numpy as np
	import time

	def read_data():
	#M x N
	data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
	print data.shape
	# print data
	return data
	def pca(data,k):
	#data M x N
	#get mean
	mean= np.mean(data,axis=0) # N long
	# print mean.shape
	# print mean

	#M x N
	data_c= (data-mean)
	print data_c.shape
	#test save and load of numpy matrix
	#test matrix multiplication in memory and using memmap

	#in case of memmap no need to use batch processing

	#also can test hdf5 and pytables for matrix mult

	#can use matrix mult for pca (more smart to use randompca)

	#need to test it on x64 machine
	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.datasets import fetch_mldata
	from sklearn.decomposition import FastICA, PCA
	from sklearn.cluster import KMeans

	# fetch natural image patches
	image_patches = fetch_mldata("natural scenes data")
	X = image_patches.data
	#!/usr/bin/python
	#
	# K-means clustering using Lloyd's algorithm in pure Python.
	# Written by Lars Buitinck. This code is in the public domain.
	#
	# The main program runs the clustering algorithm on a bunch of text documents
	# specified as command-line arguments. These documents are first converted to
	# sparse vectors, represented as lists of (index, value) pairs.

	from collections import defaultdict
	import numpy as np
	from sklearn.svm import SVC
	from sklearn.svm import LinearSVC
	from sklearn.linear_model.stochastic_gradient import SGDClassifier
	from sklearn.datasets import fetch_mldata
	from sklearn.utils import shuffle
	import time

	#out-of-core \ online
	#http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html
	from sklearn.grid_search import GridSearchCV
	from sklearn.cross_validation import StratifiedKFold

	def main():
	mnist = fetch_mldata("MNIST original")
	X_all, y_all = mnist.data/255., mnist.target
	print("scaling")
	X = X_all[:60000, :]
	y = y_all[:60000]
	import numpy as np
	import matplotlib.pyplot as plt
	from itertools import product
	from sklearn.decomposition import RandomizedPCA
	from sklearn.datasets import fetch_mldata
	from sklearn.utils import shuffle

	#use all digits
	mnist = fetch_mldata("MNIST original")
	X_train, y_train = mnist.data[:70000] / 255., mnist.target[:70000]