Skip to content

Instantly share code, notes, and snippets.

@ibayer
ibayer / cd_fast2.py
Created June 23, 2012 15:05
test pure glmnet cd python implementation against cd_fast.enet_coordinate_descent
import numpy as np
def fsign( f):
if f == 0:
return 0
elif f > 0:
return 1.0
else:
return -1.0
@ibayer
ibayer / cd_fast2.py
Created June 24, 2012 19:17 — forked from agramfort/cd_fast2.py
test pure glmnet cd python implementation against cd_fast.enet_coordinate_descent
import numpy as np
from cd_regression import enet_f
def fsign(f):
if f == 0:
return 0
elif f > 0:
return 1.0
else:
@ibayer
ibayer / code_for_blogger.py
Created June 28, 2012 16:06
glmnet cd python implementation
def enet_coordinate_descent2(w, l2_reg, l1_reg, X, y, max_iter):
n_samples = X.shape[0]
n_features = X.shape[1]
norm_cols_X = (X ** 2).sum(axis=0)
Xy = np.dot(X.T,y)
gradient = np.zeros(n_features)
feature_inner_product = np.zeros(shape=(n_features, n_features))
@ibayer
ibayer / cd_fast.prof
Created July 1, 2012 11:16
profiling cd_fast with yep and google-profiler
\00\00\00\00\00\00\00\00\00\00\00'\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\0000110000-002a2000 r-xp 00000000 00:00 399583 /lib/i386-linux-gnu/libcrypto.so.1.0.0
002a2000-002b1000 r--p 00192000 00:00 399583 /lib/i386-linux-gnu/libcrypto.so.1.0.0
002b1000-002b8000 rw-p 001a1000 00:00 399583 /lib/i386-linux-gnu/libcrypto.so.1.0.0
002b8000-002bb000 rw-p 00000000 00:00 0
002bb000-0045a000 r-xp 00000000 00:00 394914 /lib/i386-linux-gnu/libc-2.15.so
0045a000-0045c000 r--p 0019f000 00:00 394914 /lib/i386-linux-gnu/libc-2.15.so
0045c000-0045d000 rw-p 001a1000 00:00 394914 /lib/i386-linux-gnu/libc-2.15.so
0045d000-00460000 rw-p 00000000 00:00 0
00460000-00566000 r-xp 00000000 00:00 3671697 /home/mane/virt_env/scikit-learn/lib/python2.7/site-packages/numpy/core/multiarray.so
00566000-00567000 r--p 00106000 00:00 3671697 /home/mane/virt_env/scikit-learn/lib/python2.7/site-packages/numpy/core/multiarray.so
@ibayer
ibayer / get_glmnet_timing_from_R.py
Created July 9, 2012 21:01
Some lines to get timings for the glmnet fortran implementation via the R interface. This should later be used to compare it against the scikit-learn glmnet implementation.
import rpy2.robjects as robjects
from rpy2.robjects import *
r_script_path = r"/home/mane/workspace/benchmark"
# Set wd in R
robjects.r.setwd(r_script_path)
# Load the R code
r.source(os.path.join(r_script_path, "time_glmnet_fit.R"))
@ibayer
ibayer / convert_R_sparse_data_to_mm.R
Created July 9, 2012 21:13
Testing how sparse datasets that are only available as RData file could be converted to the mldata.org hdf5 format. This is not working as the hdf5 specification stated on mldata.org is not recognized from the there parser.
load('InternetAd.RData')
file = file.path(getwd(), 'InternetAd.mtx')
library(Matrix)
writeMM(InternetAd$x, file=file)
file = file.path(getwd(), 'InternetAd.target')
write(InternetAd$y, file=file, ncolumns=1)
@ibayer
ibayer / strong_rule_enet.py
Created July 25, 2012 20:41
strong rule for enet
import numpy as np
from scipy import linalg
MAX_ITER = 100
# cdef double l1_reg = alpha * rho * n_samples
# cdef double l2_reg = alpha * (1.0 - rho) * n_samples
def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER):
n_samples = X.shape[0]
@ibayer
ibayer / strong_rule_enet.py
Created July 27, 2012 13:50 — forked from agramfort/gist:3181189
strong rules lasso and enet
# -*- coding: utf-8 -*-
"""
Generalized linear models via coordinate descent
Author: Fabian Pedregosa <fabian@fseoane.net>
"""
import numpy as np
from scipy import linalg
@ibayer
ibayer / bench_enet_refactoring.py
Created August 3, 2012 14:07
bench enet refactoring
"""
Benchmarks of refactored against current enet implementation
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of
the number of dimensions.
"""
Benchmarks of enet_coordinate_descent vs. enet_coordinate_descent
using the true solution as warm-start
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of