Skip to content

Instantly share code, notes, and snippets.

View tansey's full-sized avatar

Wesley Tansey tansey

View GitHub Profile
@tansey
tansey / binary_matrix_factorization.py
Last active June 1, 2020 21:55
Quick and dirty binary matrix factorization via alternating logistic regression
import numpy as np
from functools import partial
from scipy.optimize import fmin_l_bfgs_b
from sklearn.linear_model import LogisticRegression
def binary_mf(Y, nembeds, lam=None, lams=30, cv=5, max_steps=30, tol=1e-4, verbose=False):
# Convert to a log-space grid
if lam is None and isinstance(lams, int):
lams = np.exp(np.linspace(np.log(1e-2), np.log(1), lams))
@tansey
tansey / aicc_select.py
Created July 6, 2021 01:10
Post-selection inference example for AICc-based model screening
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
def generalized_liang_sim_xy(N=500, P=500, S=100):
'''Generates data from a simple linear model'''
X = (np.random.normal(size=(N,1)) + np.random.normal(size=(N,P))) / 2.
w0 = np.random.normal(1, size=S//4)
w1 = np.random.normal(2, size=S//4)
@tansey
tansey / pav.py
Created April 5, 2022 16:26
1d and 2d pool adjacent violators (PAV)
import numpy as np
def pav(y):
"""
PAV uses the pair adjacent violators method to produce a monotonic
smoothing of y
translated from matlab by Sean Collins (2006) as part of the EMAP toolbox
Author : Alexandre Gramfort
license : BSD
@tansey
tansey / median_deviations.py
Created July 13, 2022 11:02
code to cap values at median deviations
import numpy as np
import numpy.ma as ma
def cap_outliers(points, thresh=3.5, data=None, median=None, med_abs_deviation=None):
'''
Cap outliers to be within a certain number of median deviations.
'''
if type(points) is np.float64:
points = np.array([points])