Eustache oddskool

## c2st.py
import numpy as np
from sklearn.linear import LogisticRegression
from sklearn.metrics import hamming_loss

def c2st(X, y, clf=LogisticRegression(), loss=hamming_loss, bootstraps=300):
    """
    Perform Classifier Two Sample Test (C2ST) [1].

    This test estimates if a target is predictable from features by comparing the loss of a classifier learning
    the true target with the distribution of losses of classifiers learning a random target with the same average.

## gist:27476a1e22df357de798
import array
import csv
import numpy as np
from scipy.sparse import csr_matrix

def csv_to_csr(f):
    """Read content of CSV file f, return as CSR matrix."""
    data = array.array("f")
    indices = array.array("i")
    indptr = array.array("i", [0])

## ddm.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                oddskool
                / ddm.ipynb
            
            
              Created
              July 17, 2014 07:38
            
              
                Drop Detection Methods
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Deliverability Series.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                oddskool
                / Deliverability Series.ipynb
            
            
              Created
              June 25, 2014 15:48
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:8824062
{
 "metadata": {
  "language": "Julia",
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [

## gist:7300982
from scipy.sparse.csr import csr_matrix
import sys
import numpy as np
from scipy.sparse.base import issparse
from sklearn.linear_model.stochastic_gradient import SGDRegressor
from sklearn.metrics import r2_score

np.random.seed(42)

n_samples, n_features = 300, 30

## gist:6509465
import sys
import boto
from collections import defaultdict

s3 = boto.connect_s3()
bucket = s3.lookup(sys.argv[1])

total_bytes = defaultdict(int)

def process(key):

## parse_aws_s3_billing.py
# -*- coding:utf-8 -*-
'''
Simplistic script to parse the detailed AWS billing CSV file.

Script displays cost of S3 operations broken down per region, bucket and usage
type (either storage or network). It also sums up the amount of storage used per bucket.
Output is filtered wrt to costs < 1$.

See http://docs.aws.amazon.com/awsaccountbilling/latest/about/programaccess.html for
how to set up programmatic access to your billing.

## gist:5633266
from collections import defaultdict
import re
import numpy as np

from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction import FeatureHasher
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.externals import joblib

def tokens(doc):

## gist:5249033
import random

class EpsilonGreedyBandit(Bandit):
    """
    The best action (as much as the algorithm knows so far) is selected for
    a proportion 1 - \epsilon of the trials, and another action is randomly
    selected (with uniform probability) for a proportion \epsilon.

    Parameters
    ----------
	import numpy as np
	from sklearn.linear import LogisticRegression
	from sklearn.metrics import hamming_loss

	def c2st(X, y, clf=LogisticRegression(), loss=hamming_loss, bootstraps=300):
	"""
	Perform Classifier Two Sample Test (C2ST) [1].

	This test estimates if a target is predictable from features by comparing the loss of a classifier learning
	the true target with the distribution of losses of classifiers learning a random target with the same average.
	import array
	import csv
	import numpy as np
	from scipy.sparse import csr_matrix

	def csv_to_csr(f):
	"""Read content of CSV file f, return as CSR matrix."""
	data = array.array("f")
	indices = array.array("i")
	indptr = array.array("i", [0])
	{
	"metadata": {
	"language": "Julia",
	"name": ""
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	from scipy.sparse.csr import csr_matrix
	import sys
	import numpy as np
	from scipy.sparse.base import issparse
	from sklearn.linear_model.stochastic_gradient import SGDRegressor
	from sklearn.metrics import r2_score

	np.random.seed(42)

	n_samples, n_features = 300, 30
	import sys
	import boto
	from collections import defaultdict

	s3 = boto.connect_s3()
	bucket = s3.lookup(sys.argv[1])

	total_bytes = defaultdict(int)

	def process(key):
	# -- coding:utf-8 --
	'''
	Simplistic script to parse the detailed AWS billing CSV file.

	Script displays cost of S3 operations broken down per region, bucket and usage
	type (either storage or network). It also sums up the amount of storage used per bucket.
	Output is filtered wrt to costs < 1$.

	See http://docs.aws.amazon.com/awsaccountbilling/latest/about/programaccess.html for
	how to set up programmatic access to your billing.
	from collections import defaultdict
	import re
	import numpy as np

	from sklearn.datasets import fetch_20newsgroups
	from sklearn.feature_extraction import FeatureHasher
	from sklearn.linear_model.stochastic_gradient import SGDClassifier
	from sklearn.externals import joblib

	def tokens(doc):
	import random

	class EpsilonGreedyBandit(Bandit):
	"""
	The best action (as much as the algorithm knows so far) is selected for
	a proportion 1 - \epsilon of the trials, and another action is randomly
	selected (with uniform probability) for a proportion \epsilon.

	Parameters
	----------