Skip to content

Instantly share code, notes, and snippets.

@mrgloom
mrgloom / pca memmap
Created May 23, 2014 14:23
pca memmap
import numpy as np
import time
def read_data():
#M x N
data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
print data.shape
# print data
return data
@mrgloom
mrgloom / gist:a8e329b7087281ec4f61
Created May 23, 2014 10:36
naive pca numpy implementation
def pca(data,k):
#data M x N
#get mean
mean= np.mean(data,axis=0) # N long
# print mean.shape
# print mean
#M x N
data_c= (data-mean)
print data_c.shape
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap
#in case of memmap no need to use batch processing
#also can test hdf5 and pytables for matrix mult
#can use matrix mult for pca (more smart to use randompca)
#need to test it on x64 machine
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import FastICA, PCA
from sklearn.cluster import KMeans
# fetch natural image patches
image_patches = fetch_mldata("natural scenes data")
X = image_patches.data
@mrgloom
mrgloom / gist:11046631
Created April 18, 2014 14:20
numpy.memmap test
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap
#in case of memmap no need to use batch processing
#also can test hdf5 and pytables for matrix mult
#can use matrix mult for pca (more smart to use randompca)
#need to test it on x64 machine
#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.
from collections import defaultdict
@mrgloom
mrgloom / gist:7783666
Created December 4, 2013 07:34
MNIST classifier test with default params.
import numpy as np
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle
import time
#out-of-core \ online
#http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
def main():
mnist = fetch_mldata("MNIST original")
X_all, y_all = mnist.data/255., mnist.target
print("scaling")
X = X_all[:60000, :]
y = y_all[:60000]
@mrgloom
mrgloom / MNIST PCA projection
Last active May 7, 2020 12:18
MNIST PCA projection using scikit-learn.
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from sklearn.decomposition import RandomizedPCA
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle
#use all digits
mnist = fetch_mldata("MNIST original")
X_train, y_train = mnist.data[:70000] / 255., mnist.target[:70000]
import numpy as np
import matplotlib.pyplot as plt
from itertools import product
from sklearn.decomposition import RandomizedPCA
from sklearn.datasets import fetch_mldata
from sklearn.utils import shuffle
mnist = fetch_mldata("MNIST original")
X_train, y_train = mnist.data[:60000] / 255., mnist.target[:60000]