Skip to content

Instantly share code, notes, and snippets.

View lmassaron's full-sized avatar
🦉
when the shadows gather, the owl of Minerva takes flight

Luca Massaron lmassaron

🦉
when the shadows gather, the owl of Minerva takes flight
View GitHub Profile
@lmassaron
lmassaron / personal-assistant-for-knowledge-management-based-on-gemini-on-vertex-ai.ipynb
Last active February 26, 2025 18:07
Personal Assistant for knowledge management based on Gemini on Vertex AI.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import math
class RunningStats:
def __init__(self):
self.count = 0
self.mean = 0.0
self.M2 = 0.0
def update(self, value):
self.count += 1
@lmassaron
lmassaron / decorator_example
Created May 15, 2023 06:31
An example about how to build a decorator
import functools
def decorator(func_to_decorate):
"""
@wraps
(func) updates .__name__ and .__doc__ so that code completion
works in editors and you can pull up documentation.
"""
@functools.wraps(func_to_decorate)
def wrapper(*args, **kwargs):
# Do something
@lmassaron
lmassaron / theils_u
Created March 19, 2023 12:30
Theil's U & Cramer's V
def cramers_v(x, y):
confusion_matrix = pd.crosstab(x,y)
chi2 = ss.chi2_contingency(confusion_matrix)[0]
n = confusion_matrix.sum().sum()
phi2 = chi2/n
r,k = confusion_matrix.shape
phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
rcorr = r-((r-1)**2)/(n-1)
kcorr = k-((k-1)**2)/(n-1)
return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
@lmassaron
lmassaron / gist:700cad521198798cfb901ba9a711ae80
Created October 27, 2022 09:36
Fixed effects in linear regression
https://lost-stats.github.io/Model_Estimation/OLS/fixed_effects_in_linear_regression.html
dealing with zeros and ones in a beta regression
------------------------------------------------
Smithson, M. & Verkuilen, J.
A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables.
Psychol. Methods 11, 54–71 (2006).
DOI: 10.1037/1082-989X.11.1.54
https://stats.stackexchange.com/questions/31300/dealing-with-0-1-values-in-a-beta-regression
zero-one inflated beta regression
from scipy.stats import beta, norm
import numpy as np
data = np.array([0.0, 0.0, 0.1, 0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.8, 0.9, 1.0, 1.0, 1.0])
eps = 0.000001
data[data==0.0] += eps
data[data==1.0] -= eps
a, b, loc, scale = beta.fit(data, floc=0, fscale=1)
def poly1_cross_entropy(logits, labels, epsilon=1.0):
# pt, CE, and Poly1 have shape [batch].
pt = tf.reduce_sum(labels * tf.nn.softmax(logits), axis=-1)
CE = tf.nn.softmax_cross_entropy_with_logits(labels, logits)
Poly1 = CE + epsilon * (1 - pt)
return Poly1
def poly1_focal_loss(logits, labels, epsilon=1.0, gamma=2.0):
# p, pt, FL, and Poly1 have shape [batch, num of classes].
p = tf.math.sigmoid(logits)
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
class ClassifierTransformer(BaseEstimator, TransformerMixin):
"""
Classifier's estimates of a regression problem using oof
"""
def __init__(self, estimator=None, n_classes=2, cv=3):
self.estimator = estimator
self.n_classes = n_classes
@lmassaron
lmassaron / gist:ee6f926e2fa3eb1fe204e47e1ae60c88
Last active September 5, 2021 07:04
Reduce memory usage of a pandas DataFrame
# Derived from the original script https://www.kaggle.com/gemartin/load-data-reduce-memory-usage
# by Guillaume Martin
def reduce_mem_usage(df, verbose=True):
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
start_mem = df.memory_usage().sum() / 1024**2
for col in df.columns:
col_type = df[col].dtypes
if col_type in numerics:
c_min = df[col].min()