Juan Quintana jmquintana79

## function_transformer.py
import pandas as pd
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
# example
from sklearn.linear_model import LogisticRegression

# X, y

def get_dummies_size(df):
    return pd.get_dummies(df, columns=['size'])

## pipeline_template_scikit.py
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
# example models and preprocessors
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

# X, y

## config_experiments.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jmquintana79
                / config_experiments.md
            
            
              Last active
              July 6, 2024 21:15
            
          
Reference: https://hydra.cc/docs/patterns/configuring_experiments/

config.ymal

defaults:
  - db: mysql
  - server: apache

db/mysql.ymal


## herencia1.py
class Credentials():

    def __init__(self):
        self.user = "user"
        self.password = "password"


class Service(Credentials):
    def __init__(self):
        super().__init__()

## stack_according_to_a_reference.py
import pandas as pd

## unstack a timeseries target variable according to a categorical reference column
def unstack_ts_according_to_reference(df:pd.DataFrame, c_dt:str, c_cat_reference:str, c_target_variable:str)->pd.DataFrame:
    """
    Unstack a timeseries target variable according to a categorical reference column.
    df -- Dataframe to be processed.
    c_dt -- Temporal column.
    c_cat_reference -- Categorical column to be used as reference to stack the target variable.
    c_target_variable -- Num / Cat column to be stacked.

## regression.py
from scipy.stats import linregress
# estimate linear regression y = Ax + B
A, B, r_value, p_value, std_err = linregress(x, y)

## timdelta_comps.py
# original column
In [15]: df["timedelta_column"]
Out[15]:
0    1 days 00:00:00
1    3 days 02:00:00
2    5 days 04:00:00
3    7 days 06:00:00
4    9 days 08:00:00
5   11 days 10:00:00
dtype: timedelta64[ns]

## get_extension.py
import os
extension = os.path.splitext(filename)[1]

## validate_exitst.py
import os

if os.path.isfile("filename.txt"):
    # file exists
    f = open("filename.txt")

if os.path.isdir("data"):
    # directory exists

if os.path.exists(file_path):

## angle_aggregation.py
import numpy as np

## angle format: 0/360 to -180/180
def angles_format(angle_0_360:np.array)->np.array:
    return np.array([v-360 if v>=180 else v for v in angle_0_360])

## aggregation for angular data
def angles_agg(angle_0_360:np.array, func_agg) -> float:
    """
    Calculate wind direction average.
	import pandas as pd
	from sklearn.preprocessing import FunctionTransformer
	from sklearn.pipeline import Pipeline
	# example
	from sklearn.linear_model import LogisticRegression

	# X, y

	def get_dummies_size(df):
	return pd.get_dummies(df, columns=['size'])
	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	# example models and preprocessors
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import LogisticRegression

	# X, y
	class Credentials():

	def __init__(self):
	self.user = "user"
	self.password = "password"


	class Service(Credentials):
	def __init__(self):
	super().__init__()
	import pandas as pd

	## unstack a timeseries target variable according to a categorical reference column
	def unstack_ts_according_to_reference(df:pd.DataFrame, c_dt:str, c_cat_reference:str, c_target_variable:str)->pd.DataFrame:
	"""
	Unstack a timeseries target variable according to a categorical reference column.
	df -- Dataframe to be processed.
	c_dt -- Temporal column.
	c_cat_reference -- Categorical column to be used as reference to stack the target variable.
	c_target_variable -- Num / Cat column to be stacked.
	from scipy.stats import linregress
	# estimate linear regression y = Ax + B
	A, B, r_value, p_value, std_err = linregress(x, y)
	# original column
	In [15]: df["timedelta_column"]
	Out[15]:
	0 1 days 00:00:00
	1 3 days 02:00:00
	2 5 days 04:00:00
	3 7 days 06:00:00
	4 9 days 08:00:00
	5 11 days 10:00:00
	dtype: timedelta64[ns]
	import os

	if os.path.isfile("filename.txt"):
	# file exists
	f = open("filename.txt")

	if os.path.isdir("data"):
	# directory exists

	if os.path.exists(file_path):
	import numpy as np

	## angle format: 0/360 to -180/180
	def angles_format(angle_0_360:np.array)->np.array:
	return np.array([v-360 if v>=180 else v for v in angle_0_360])

	## aggregation for angular data
	def angles_agg(angle_0_360:np.array, func_agg) -> float:
	"""
	Calculate wind direction average.