Luca Massaron lmassaron

## personal-assistant-for-knowledge-management-based-on-gemini-on-vertex-ai.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                lmassaron
                / personal-assistant-for-knowledge-management-based-on-gemini-on-vertex-ai.ipynb
            
            
              Last active
              February 26, 2025 18:07
            
              
                Personal Assistant for knowledge management based on Gemini on Vertex AI.ipynb
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:a01430584d001b8513661d9b51fe9527
import math

class RunningStats:
    def __init__(self):
        self.count = 0
        self.mean = 0.0
        self.M2 = 0.0

    def update(self, value):
        self.count += 1

## decorator_example
import functools
def decorator(func_to_decorate):
	"""
	@wraps
	(func) updates .__name__ and .__doc__ so that code completion
	works in editors and you can pull up documentation.
	"""
	@functools.wraps(func_to_decorate)
	def wrapper(*args, **kwargs):
		# Do something

## theils_u
def cramers_v(x, y):
    confusion_matrix = pd.crosstab(x,y)
    chi2 = ss.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = chi2/n
    r,k = confusion_matrix.shape
    phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
    rcorr = r-((r-1)**2)/(n-1)
    kcorr = k-((k-1)**2)/(n-1)
    return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))

## gist:700cad521198798cfb901ba9a711ae80
https://lost-stats.github.io/Model_Estimation/OLS/fixed_effects_in_linear_regression.html

## gist:493384b4d84e941860b766069fa1101e
dealing with zeros and ones in a beta regression
------------------------------------------------
Smithson, M. & Verkuilen, J.
A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables.
Psychol. Methods 11, 54–71 (2006).
DOI: 10.1037/1082-989X.11.1.54

https://stats.stackexchange.com/questions/31300/dealing-with-0-1-values-in-a-beta-regression

zero-one inflated beta regression

## gist:08022e925ae1f40d2e39bac6703a881e
from scipy.stats import beta, norm
import numpy as np

data = np.array([0.0, 0.0, 0.1, 0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.8, 0.9, 1.0, 1.0, 1.0])
eps = 0.000001

data[data==0.0] += eps
data[data==1.0] -= eps

a, b, loc, scale = beta.fit(data, floc=0, fscale=1)

## polyloss
def poly1_cross_entropy(logits, labels, epsilon=1.0):
    # pt, CE, and Poly1 have shape [batch].
    pt = tf.reduce_sum(labels * tf.nn.softmax(logits), axis=-1)
    CE = tf.nn.softmax_cross_entropy_with_logits(labels, logits)
    Poly1 = CE + epsilon * (1 - pt)
    return Poly1

def poly1_focal_loss(logits, labels, epsilon=1.0, gamma=2.0):
    # p, pt, FL, and Poly1 have shape [batch, num of classes].
    p = tf.math.sigmoid(logits)

## gist:f4c00689ba2bab53c1fd7b5b63730a34
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

class ClassifierTransformer(BaseEstimator, TransformerMixin):
    """
    Classifier's estimates of a regression problem using oof
    """
    def __init__(self, estimator=None, n_classes=2, cv=3):
        self.estimator = estimator
        self.n_classes = n_classes

## gist:ee6f926e2fa3eb1fe204e47e1ae60c88
# Derived from the original script https://www.kaggle.com/gemartin/load-data-reduce-memory-usage
# by Guillaume Martin

def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
	import math

	class RunningStats:
	def __init__(self):
	self.count = 0
	self.mean = 0.0
	self.M2 = 0.0

	def update(self, value):
	self.count += 1
	import functools
	def decorator(func_to_decorate):
	"""
	@wraps
	(func) updates .__name__ and .__doc__ so that code completion
	works in editors and you can pull up documentation.
	"""
	@functools.wraps(func_to_decorate)
	def wrapper(args, *kwargs):
	# Do something
	def cramers_v(x, y):
	confusion_matrix = pd.crosstab(x,y)
	chi2 = ss.chi2_contingency(confusion_matrix)[0]
	n = confusion_matrix.sum().sum()
	phi2 = chi2/n
	r,k = confusion_matrix.shape
	phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
	rcorr = r-((r-1)**2)/(n-1)
	kcorr = k-((k-1)**2)/(n-1)
	return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
	dealing with zeros and ones in a beta regression
	------------------------------------------------
	Smithson, M. & Verkuilen, J.
	A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables.
	Psychol. Methods 11, 54–71 (2006).
	DOI: 10.1037/1082-989X.11.1.54

	https://stats.stackexchange.com/questions/31300/dealing-with-0-1-values-in-a-beta-regression

	zero-one inflated beta regression
	from scipy.stats import beta, norm
	import numpy as np

	data = np.array([0.0, 0.0, 0.1, 0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.8, 0.9, 1.0, 1.0, 1.0])
	eps = 0.000001

	data[data==0.0] += eps
	data[data==1.0] -= eps

	a, b, loc, scale = beta.fit(data, floc=0, fscale=1)
	def poly1_cross_entropy(logits, labels, epsilon=1.0):
	# pt, CE, and Poly1 have shape [batch].
	pt = tf.reduce_sum(labels * tf.nn.softmax(logits), axis=-1)
	CE = tf.nn.softmax_cross_entropy_with_logits(labels, logits)
	Poly1 = CE + epsilon * (1 - pt)
	return Poly1

	def poly1_focal_loss(logits, labels, epsilon=1.0, gamma=2.0):
	# p, pt, FL, and Poly1 have shape [batch, num of classes].
	p = tf.math.sigmoid(logits)
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

	class ClassifierTransformer(BaseEstimator, TransformerMixin):
	"""
	Classifier's estimates of a regression problem using oof
	"""
	def __init__(self, estimator=None, n_classes=2, cv=3):
	self.estimator = estimator
	self.n_classes = n_classes
	# Derived from the original script https://www.kaggle.com/gemartin/load-data-reduce-memory-usage
	# by Guillaume Martin

	def reduce_mem_usage(df, verbose=True):
	numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
	start_mem = df.memory_usage().sum() / 1024**2
	for col in df.columns:
	col_type = df[col].dtypes
	if col_type in numerics:
	c_min = df[col].min()