Olivier Grisel ogrisel

## .gitignore
*.swp
*.pyc
*.png
data/*
build

## incoming-links.txt
# Sample single piped shell unix commands to estimate the top 10 popular DBpedia resources
# by counting incoming links between matching wikipedia articles

% time curl http://downloads.dbpedia.org/3.5/en/page_links_en.nt.bz2 \
  | bzcat \
  | head -1000000  \
  | sed -e 's/.*\/\(.*\)> \./\1/' \
  | sort \
  | uniq -c  \
  | sort -nr \

## .gitignore
*.pyc
mnist2500*

build/
pip-log.txt
text-documents/


## out.txt
asynchronous buffer forging
anonymous identity injection
asynchronous SQL skewing
synchronous buffer analysis
reverse jail fuzzing
tainted state inspection
multi-modal integrity recovery
deep state engineering
social state breaking
monotonic state forging

## couchdb-twitter-ec2-setup.sh
#!/bin/bash

sudo apt-get update
sudo apt-get install -y byobu couchdb python-pip python-lxml

sudo pip install -U tweepy couchdbkit restkit

## enet_whitening.py
"""Evaluating the impact of PCA + whitening on low rank data"""
import numpy as np
from pprint import pprint

from scikits.learn.datasets.samples_generator import make_regression_dataset
from scikits.learn.pca import PCA
from scikits.learn.linear_model import ElasticNetCV

data_opts = {
    'n_train_samples': 5000,

## sparse_pca.py
import numpy as np, scipy, scipy.sparse, numpy.linalg, scipy.optimize
from scipy import weave


def project_l1(lbda, sigma):
    "Project positive vector lbda to have l1 norm sigma"
    ll = -np.sort(-lbda)
    cs = 0.
    theta = 0
    prevtheta = 0

## SparsePCA.py
import time
import sys

import numpy as np
from scipy import linalg
from scikits.learn.linear_model import Lasso, lars_path
from joblib import Parallel, delayed

################################################################################
# Utilities to spread load on CPUs

## .gitignore
build
*.so
*.prof

## README.md

      
              6 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ogrisel
                / README.md
            
            
              Created
              September 7, 2011 22:36
            
              
                V-Measure and adjustment for chance
              
          
    This is an experiment to highlight the dependency of the V-Measure value
on the number of clusters of 2 independent uniform labelings for a finite
number of samples.
Intuitively it seems that for finite number of samples the V-Measure is
victim of some kind of birthday paradox that naive users might not be
aware of.
Even if the maximum number of clusters considered (e.g. 10) is small
with respect to the number of samples (e.g. 5000), the V-Measure of
	# Sample single piped shell unix commands to estimate the top 10 popular DBpedia resources
	# by counting incoming links between matching wikipedia articles

	% time curl http://downloads.dbpedia.org/3.5/en/page_links_en.nt.bz2 \
	\| bzcat \
	\| head -1000000 \
	\| sed -e 's/.\/\(.\)> \./\1/' \
	\| sort \
	\| uniq -c \
	\| sort -nr \
	asynchronous buffer forging
	anonymous identity injection
	asynchronous SQL skewing
	synchronous buffer analysis
	reverse jail fuzzing
	tainted state inspection
	multi-modal integrity recovery
	deep state engineering
	social state breaking
	monotonic state forging
	#!/bin/bash

	sudo apt-get update
	sudo apt-get install -y byobu couchdb python-pip python-lxml

	sudo pip install -U tweepy couchdbkit restkit
	"""Evaluating the impact of PCA + whitening on low rank data"""
	import numpy as np
	from pprint import pprint

	from scikits.learn.datasets.samples_generator import make_regression_dataset
	from scikits.learn.pca import PCA
	from scikits.learn.linear_model import ElasticNetCV

	data_opts = {
	'n_train_samples': 5000,
	import numpy as np, scipy, scipy.sparse, numpy.linalg, scipy.optimize
	from scipy import weave


	def project_l1(lbda, sigma):
	"Project positive vector lbda to have l1 norm sigma"
	ll = -np.sort(-lbda)
	cs = 0.
	theta = 0
	prevtheta = 0
	import time
	import sys

	import numpy as np
	from scipy import linalg
	from scikits.learn.linear_model import Lasso, lars_path
	from joblib import Parallel, delayed

	################################################################################
	# Utilities to spread load on CPUs