Wesley Tansey tansey

## median_deviations.py
import numpy as np
import numpy.ma as ma


def cap_outliers(points, thresh=3.5, data=None, median=None, med_abs_deviation=None):
    '''
    Cap outliers to be within a certain number of median deviations.
    '''
    if type(points) is np.float64:
        points = np.array([points])

## pav.py
import numpy as np

def pav(y):
    """
    PAV uses the pair adjacent violators method to produce a monotonic
    smoothing of y

    translated from matlab by Sean Collins (2006) as part of the EMAP toolbox
    Author : Alexandre Gramfort
    license : BSD

## aicc_select.py
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm


def generalized_liang_sim_xy(N=500, P=500, S=100):
    '''Generates data from a simple linear model'''
    X = (np.random.normal(size=(N,1)) + np.random.normal(size=(N,P))) / 2.
    w0 = np.random.normal(1, size=S//4)
    w1 = np.random.normal(2, size=S//4)

## binary_matrix_factorization.py
import numpy as np
from functools import partial
from scipy.optimize import fmin_l_bfgs_b
from sklearn.linear_model import LogisticRegression


def binary_mf(Y, nembeds, lam=None, lams=30, cv=5, max_steps=30, tol=1e-4, verbose=False):
    # Convert to a log-space grid
    if lam is None and isinstance(lams, int):
        lams = np.exp(np.linspace(np.log(1e-2), np.log(1), lams))

## knockoffs.py
'''
A O(nlogn) time implementation of the knockoff filter.

Author: Wesley Tansey
Date: 3/27/2020
'''
import numpy as np

def knockoff_filter(knockoff_stats, alpha, offset=1.0, is_sorted=False):
    '''Perform the knockoffs selection procedure at the target FDR threshold.

## multifactor.py
'''
Heterogeneous factor modeling.

This model fits a heterogeneous factor model where columns may be:
1) Binary
2) Categorical
3) Gaussian

Everything is fit via alternating minimization and stochastic gradient descent.
The code relies on pytorch for SGD and a demo is included.

## factor_pav.py
'''Pool adjacent violators algorithm for (column-)monotone matrix factorization.

Applies the PAV algorithm to column factors of a matrix factorization:
Given: M = W.V'
Returns: V_proj, a projected version of V such that M[i] is monotone decreasing
for all i.

Author: Wesley Tansey
Date: May 2019
'''

## fast_mvn.py
'''Fast sampling from a multivariate normal with covariance or precision
    parameterization. Supports sparse arrays. Params:
        - mu: If provided, assumes the model is N(mu, Q)
        - mu_part: If provided, assumes the model is N(Q mu_part, Q).
                    This is common in many conjugate Gibbs steps.
        - sparse: If true, assumes we are working with a sparse Q
        - precision: If true, assumes Q is a precision matrix (inverse covariance)
        - chol_factor: If true, assumes Q is a (lower triangular) Cholesky
                        decomposition of the covariance matrix
                        (or of the precision matrix if precision=True).

## nurse_schedules.py
'''
    Program to generate valid time allocations of a mental ward staff.


    Given:

    Two staff lists. Each list applies for a specific window of time. Lists may
    contain non-empty intersections of employees.

    Each employee has a designation as RMN or HCA.

## fitWeightedNegativeBinomial.R
# Fit using a simple EM algorithm
# observations are x
# weights are w (must be same length as x)
# returns (r, p)
# r - dispersion parameter
# p - probability of success
weightedNegBinomFit <- function(x, w, maxsteps=30)
{
    sum.wx = sum(x*w)
    sum.w = sum(w)
	import numpy as np
	import numpy.ma as ma


	def cap_outliers(points, thresh=3.5, data=None, median=None, med_abs_deviation=None):
	'''
	Cap outliers to be within a certain number of median deviations.
	'''
	if type(points) is np.float64:
	points = np.array([points])
	import numpy as np

	def pav(y):
	"""
	PAV uses the pair adjacent violators method to produce a monotonic
	smoothing of y

	translated from matlab by Sean Collins (2006) as part of the EMAP toolbox
	Author : Alexandre Gramfort
	license : BSD
	import matplotlib.pyplot as plt
	import numpy as np
	import statsmodels.api as sm


	def generalized_liang_sim_xy(N=500, P=500, S=100):
	'''Generates data from a simple linear model'''
	X = (np.random.normal(size=(N,1)) + np.random.normal(size=(N,P))) / 2.
	w0 = np.random.normal(1, size=S//4)
	w1 = np.random.normal(2, size=S//4)
	import numpy as np
	from functools import partial
	from scipy.optimize import fmin_l_bfgs_b
	from sklearn.linear_model import LogisticRegression


	def binary_mf(Y, nembeds, lam=None, lams=30, cv=5, max_steps=30, tol=1e-4, verbose=False):
	# Convert to a log-space grid
	if lam is None and isinstance(lams, int):
	lams = np.exp(np.linspace(np.log(1e-2), np.log(1), lams))
	'''
	A O(nlogn) time implementation of the knockoff filter.

	Author: Wesley Tansey
	Date: 3/27/2020
	'''
	import numpy as np

	def knockoff_filter(knockoff_stats, alpha, offset=1.0, is_sorted=False):
	'''Perform the knockoffs selection procedure at the target FDR threshold.
	'''
	Heterogeneous factor modeling.

	This model fits a heterogeneous factor model where columns may be:
	1) Binary
	2) Categorical
	3) Gaussian

	Everything is fit via alternating minimization and stochastic gradient descent.
	The code relies on pytorch for SGD and a demo is included.
	'''Pool adjacent violators algorithm for (column-)monotone matrix factorization.

	Applies the PAV algorithm to column factors of a matrix factorization:
	Given: M = W.V'
	Returns: V_proj, a projected version of V such that M[i] is monotone decreasing
	for all i.

	Author: Wesley Tansey
	Date: May 2019
	'''
	'''Fast sampling from a multivariate normal with covariance or precision
	parameterization. Supports sparse arrays. Params:
	- mu: If provided, assumes the model is N(mu, Q)
	- mu_part: If provided, assumes the model is N(Q mu_part, Q).
	This is common in many conjugate Gibbs steps.
	- sparse: If true, assumes we are working with a sparse Q
	- precision: If true, assumes Q is a precision matrix (inverse covariance)
	- chol_factor: If true, assumes Q is a (lower triangular) Cholesky
	decomposition of the covariance matrix
	(or of the precision matrix if precision=True).
	'''
	Program to generate valid time allocations of a mental ward staff.


	Given:

	Two staff lists. Each list applies for a specific window of time. Lists may
	contain non-empty intersections of employees.

	Each employee has a designation as RMN or HCA.
	# Fit using a simple EM algorithm
	# observations are x
	# weights are w (must be same length as x)
	# returns (r, p)
	# r - dispersion parameter
	# p - probability of success
	weightedNegBinomFit <- function(x, w, maxsteps=30)
	{
	sum.wx = sum(x*w)
	sum.w = sum(w)