Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/python
#
# K-means clustering using Lloyd's algorithm in pure Python.
# Written by Lars Buitinck. This code is in the public domain.
#
# The main program runs the clustering algorithm on a bunch of text documents
# specified as command-line arguments. These documents are first converted to
# sparse vectors, represented as lists of (index, value) pairs.
from collections import defaultdict
@mrgloom
mrgloom / gist:11046631
Created April 18, 2014 14:20
numpy.memmap test
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap
#in case of memmap no need to use batch processing
#also can test hdf5 and pytables for matrix mult
#can use matrix mult for pca (more smart to use randompca)
#need to test it on x64 machine
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.decomposition import FastICA, PCA
from sklearn.cluster import KMeans
# fetch natural image patches
image_patches = fetch_mldata("natural scenes data")
X = image_patches.data
#test save and load of numpy matrix
#test matrix multiplication in memory and using memmap
#in case of memmap no need to use batch processing
#also can test hdf5 and pytables for matrix mult
#can use matrix mult for pca (more smart to use randompca)
#need to test it on x64 machine
@mrgloom
mrgloom / gist:a8e329b7087281ec4f61
Created May 23, 2014 10:36
naive pca numpy implementation
def pca(data,k):
#data M x N
#get mean
mean= np.mean(data,axis=0) # N long
# print mean.shape
# print mean
#M x N
data_c= (data-mean)
print data_c.shape
@mrgloom
mrgloom / pca memmap
Created May 23, 2014 14:23
pca memmap
import numpy as np
import time
def read_data():
#M x N
data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
print data.shape
# print data
return data
import numpy as np
import time
def read_data():
#M x N
data= np.loadtxt("data_3d.txt",delimiter=" ", skiprows=1, usecols=(0,1,2))
print data.shape
# print data
return data
@mrgloom
mrgloom / svm.py
Created June 4, 2014 09:41 — forked from mblondel/svm.py
# Mathieu Blondel, September 2010
# License: BSD 3 clause
import numpy as np
from numpy import linalg
import cvxopt
import cvxopt.solvers
def linear_kernel(x1, x2):
return np.dot(x1, x2)
@mrgloom
mrgloom / CUR4FIC
Created July 24, 2014 13:30 — forked from goldingn/CUR4FIC
# clear the workspace
rm(list = ls())
# load the relevant libraries
# install.packages(rCUR)
library(rCUR) # for CUR decomposition
# install.packages(irlba)
library(irlba) # for fast svd
@mrgloom
mrgloom / gist:3943410759f04265f7cb
Created November 13, 2014 09:43
Matlab SVD projection
>> a = [1 2 3; 2 5 7; 3 7 9]
a =
1 2 3
2 5 7
3 7 9
>> [U S V] = svd(a)