Sergey Feldman sergeyf

## normal_and_lognormal_data.py
import numpy as np
import matplotlib.pyplot
mu, sigma = 3., 1. # mean and standard deviation
s = np.random.lognormal(mu, sigma, 10000)
log_s = np.log(s)

subplot(211)
count,bins,_ = hist(s, 100, normed=True, align='mid')
x = np.linspace(min(bins), max(bins), 10000)
pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2)) / (x * sigma * np.sqrt(2 * np.pi)))

## igamma.py
from scipy.stats import rv_continuous
from scipy.special import gammaln, gammaincinv, gammainc
from numpy import log,exp

class igamma_gen(rv_continuous):
  def _pdf(self, x, a, b):
      return exp(self._logpdf(x,a,b))
  def _logpdf(self, x, a, b):
      return a*log(b) - gammaln(a) -(a+1)*log(x) - b/x
  def _cdf(self, x, a, b):

## chi_square_kernel.py
import numpy as np
from sklearn.feature_extraction import image
from sklearn.cluster import MiniBatchKMeans
from sklearn import cross_validation, svm, datasets
from sklearn.datasets import fetch_olivetti_faces, fetch_mldata
from matplotlib import pylab as pl

def HIK_kernel(X,Y):
    return np.array([[np.sum(np.minimum(x,y)) for y in Y] for x in X])


## arxiv2kindle.py
'''
Based on: https://gist.github.com/bshillingford/6259986edca707ca58dd
Modified to work on Windows by: Sergey Feldman
Jan 17, 2016

Requirements: pdflatex, bibtex
'''

import requests
import lxml.html as html

## gaussian_encodings.py
import numpy as np
from scipy.spatial.distance import pdist, squareform

# function that converts categorical variable
# into a one-hot encoding
def one_hot_encoding(x):
    n = len(x)
    min_category = np.min(x)
    max_category = np.max(x)
    num_categories = max_category - min_category + 1

## difficult_problem.py
import numpy as np

def f_of_x(X,w):
    n,d = X.shape
    X_dot_w = np.dot(X,w)
    y = np.zeros(n)
    # the inner product goes through a sin
    # or a cos, depending on simple condition
    cos_flag = X[:,0] < 0.0
    sin_flag = ~cos_flag

## bandit_simulations.py
import numpy as np
from matplotlib import pylab as plt
#from mpltools import style # uncomment for prettier plots
#style.use(['ggplot'])

# generate all bernoulli rewards ahead of time
def generate_bernoulli_bandit_data(num_samples,K):
    CTRs_that_generated_data = np.tile(np.random.rand(K),(num_samples,1))
    true_rewards = np.random.rand(num_samples,K) < CTRs_that_generated_data
    return true_rewards,CTRs_that_generated_data

## mdn_harder_data.py
import numpy as np

# the function
def f_of_x(X, w):
    n,d = X.shape
    X_dot_w = np.dot(X,w)
    y = np.zeros(n)
    # the inner product randomly goes through a sin
    # or a cos
    cos_flag = np.random.randn(n) < 0.0

## double_ml.py
'''
References:
    https://medium.com/teconomics-blog/using-ml-to-resolve-experiments-faster-bd8053ff602e
    https://insightr.wordpress.com/2017/06/28/cross-fitting-double-machine-learning-estimator/
    https://arxiv.org/pdf/1608.00060.pdf
'''

import numpy as np

from sklearn.linear_model import LassoCV, LinearRegression, BayesianRidge, LogisticRegression

## contextual_bandit_data_generator.py

import numpy as np

class Data_generator(object):

    def __init__(self,K,d,reward_type='binary'):

        self.d = d # dimension of the feature vector
        self.K = K # number of bandits
        self.reward_type = reward_type
	import numpy as np
	import matplotlib.pyplot
	mu, sigma = 3., 1. # mean and standard deviation
	s = np.random.lognormal(mu, sigma, 10000)
	log_s = np.log(s)

	subplot(211)
	count,bins,_ = hist(s, 100, normed=True, align='mid')
	x = np.linspace(min(bins), max(bins), 10000)
	pdf = (np.exp(-(np.log(x) - mu)*2 / (2 sigma*2)) / (x sigma * np.sqrt(2 * np.pi)))
	from scipy.stats import rv_continuous
	from scipy.special import gammaln, gammaincinv, gammainc
	from numpy import log,exp

	class igamma_gen(rv_continuous):
	def _pdf(self, x, a, b):
	return exp(self._logpdf(x,a,b))
	def _logpdf(self, x, a, b):
	return alog(b) - gammaln(a) -(a+1)log(x) - b/x
	def _cdf(self, x, a, b):
	import numpy as np
	from sklearn.feature_extraction import image
	from sklearn.cluster import MiniBatchKMeans
	from sklearn import cross_validation, svm, datasets
	from sklearn.datasets import fetch_olivetti_faces, fetch_mldata
	from matplotlib import pylab as pl

	def HIK_kernel(X,Y):
	return np.array([[np.sum(np.minimum(x,y)) for y in Y] for x in X])
	'''
	Based on: https://gist.github.com/bshillingford/6259986edca707ca58dd
	Modified to work on Windows by: Sergey Feldman
	Jan 17, 2016

	Requirements: pdflatex, bibtex
	'''

	import requests
	import lxml.html as html
	import numpy as np
	from scipy.spatial.distance import pdist, squareform

	# function that converts categorical variable
	# into a one-hot encoding
	def one_hot_encoding(x):
	n = len(x)
	min_category = np.min(x)
	max_category = np.max(x)
	num_categories = max_category - min_category + 1
	import numpy as np

	def f_of_x(X,w):
	n,d = X.shape
	X_dot_w = np.dot(X,w)
	y = np.zeros(n)
	# the inner product goes through a sin
	# or a cos, depending on simple condition
	cos_flag = X[:,0] < 0.0
	sin_flag = ~cos_flag
	import numpy as np
	from matplotlib import pylab as plt
	#from mpltools import style # uncomment for prettier plots
	#style.use(['ggplot'])

	# generate all bernoulli rewards ahead of time
	def generate_bernoulli_bandit_data(num_samples,K):
	CTRs_that_generated_data = np.tile(np.random.rand(K),(num_samples,1))
	true_rewards = np.random.rand(num_samples,K) < CTRs_that_generated_data
	return true_rewards,CTRs_that_generated_data
	import numpy as np

	# the function
	def f_of_x(X, w):
	n,d = X.shape
	X_dot_w = np.dot(X,w)
	y = np.zeros(n)
	# the inner product randomly goes through a sin
	# or a cos
	cos_flag = np.random.randn(n) < 0.0
	'''
	References:
	https://medium.com/teconomics-blog/using-ml-to-resolve-experiments-faster-bd8053ff602e
	https://insightr.wordpress.com/2017/06/28/cross-fitting-double-machine-learning-estimator/
	https://arxiv.org/pdf/1608.00060.pdf
	'''

	import numpy as np

	from sklearn.linear_model import LassoCV, LinearRegression, BayesianRidge, LogisticRegression

	import numpy as np

	class Data_generator(object):

	def __init__(self,K,d,reward_type='binary'):

	self.d = d # dimension of the feature vector
	self.K = K # number of bandits
	self.reward_type = reward_type