Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
class RunningStats: | |
def __init__(self): | |
self.count = 0 | |
self.mean = 0.0 | |
self.M2 = 0.0 | |
def update(self, value): | |
self.count += 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
def decorator(func_to_decorate): | |
""" | |
@wraps | |
(func) updates .__name__ and .__doc__ so that code completion | |
works in editors and you can pull up documentation. | |
""" | |
@functools.wraps(func_to_decorate) | |
def wrapper(*args, **kwargs): | |
# Do something |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cramers_v(x, y): | |
confusion_matrix = pd.crosstab(x,y) | |
chi2 = ss.chi2_contingency(confusion_matrix)[0] | |
n = confusion_matrix.sum().sum() | |
phi2 = chi2/n | |
r,k = confusion_matrix.shape | |
phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1)) | |
rcorr = r-((r-1)**2)/(n-1) | |
kcorr = k-((k-1)**2)/(n-1) | |
return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://lost-stats.github.io/Model_Estimation/OLS/fixed_effects_in_linear_regression.html |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dealing with zeros and ones in a beta regression | |
------------------------------------------------ | |
Smithson, M. & Verkuilen, J. | |
A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables. | |
Psychol. Methods 11, 54–71 (2006). | |
DOI: 10.1037/1082-989X.11.1.54 | |
https://stats.stackexchange.com/questions/31300/dealing-with-0-1-values-in-a-beta-regression | |
zero-one inflated beta regression |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import beta, norm | |
import numpy as np | |
data = np.array([0.0, 0.0, 0.1, 0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.8, 0.9, 1.0, 1.0, 1.0]) | |
eps = 0.000001 | |
data[data==0.0] += eps | |
data[data==1.0] -= eps | |
a, b, loc, scale = beta.fit(data, floc=0, fscale=1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def poly1_cross_entropy(logits, labels, epsilon=1.0): | |
# pt, CE, and Poly1 have shape [batch]. | |
pt = tf.reduce_sum(labels * tf.nn.softmax(logits), axis=-1) | |
CE = tf.nn.softmax_cross_entropy_with_logits(labels, logits) | |
Poly1 = CE + epsilon * (1 - pt) | |
return Poly1 | |
def poly1_focal_loss(logits, labels, epsilon=1.0, gamma=2.0): | |
# p, pt, FL, and Poly1 have shape [batch, num of classes]. | |
p = tf.math.sigmoid(logits) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier | |
class ClassifierTransformer(BaseEstimator, TransformerMixin): | |
""" | |
Classifier's estimates of a regression problem using oof | |
""" | |
def __init__(self, estimator=None, n_classes=2, cv=3): | |
self.estimator = estimator | |
self.n_classes = n_classes |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Derived from the original script https://www.kaggle.com/gemartin/load-data-reduce-memory-usage | |
# by Guillaume Martin | |
def reduce_mem_usage(df, verbose=True): | |
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] | |
start_mem = df.memory_usage().sum() / 1024**2 | |
for col in df.columns: | |
col_type = df[col].dtypes | |
if col_type in numerics: | |
c_min = df[col].min() |
NewerOlder