StatsGary/feature_eng_infer.py

## feature_eng_infer.py
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

#Custom model imports
from models.Regression import MLPRegressor

data_name ='medical_insurance'
# Read in the medical insurance data
df = pd.read_csv('https://raw.githubusercontent.com/StatsGary/Data/main/insurance_prod.csv')
# Drop nulls
df.dropna(axis='columns',inplace=True)
# Get number of rows
obs = len(df)

#=====================================================================================
# Feature Engineering
#=====================================================================================
# Encode the categorical features
cat_cols = ['sex', 'smoker', 'region', 'children']
cont_cols = ['age', 'bmi']

# Set the target (y) column
y = ['charges']

# CONVERT CATEGORICAL COLUMNS
for cat in cat_cols:
    df[cat] = df[cat].astype('category')

cats = np.stack([df[col].cat.codes.values for col in cat_cols], 1)
cats = torch.tensor(cats, dtype=torch.int64)
# Convert continuous variables to a tensor
conts = np.stack([df[col].values for col in cont_cols], 1)
conts = torch.tensor(conts, dtype=torch.float)
# Create outcome
y = torch.tensor(df[y].values, dtype=torch.float).reshape(-1,1)
# Set embedding sizes
cat_szs = [len(df[col].cat.categories) for col in cat_cols]
	import torch
	import torch.nn as nn
	import numpy as np
	import pandas as pd

	#Custom model imports
	from models.Regression import MLPRegressor

	data_name ='medical_insurance'
	# Read in the medical insurance data
	df = pd.read_csv('https://raw.githubusercontent.com/StatsGary/Data/main/insurance_prod.csv')
	# Drop nulls
	df.dropna(axis='columns',inplace=True)
	# Get number of rows
	obs = len(df)

	#=====================================================================================
	# Feature Engineering
	#=====================================================================================
	# Encode the categorical features
	cat_cols = ['sex', 'smoker', 'region', 'children']
	cont_cols = ['age', 'bmi']

	# Set the target (y) column
	y = ['charges']

	# CONVERT CATEGORICAL COLUMNS
	for cat in cat_cols:
	df[cat] = df[cat].astype('category')

	cats = np.stack([df[col].cat.codes.values for col in cat_cols], 1)
	cats = torch.tensor(cats, dtype=torch.int64)
	# Convert continuous variables to a tensor
	conts = np.stack([df[col].values for col in cont_cols], 1)
	conts = torch.tensor(conts, dtype=torch.float)
	# Create outcome
	y = torch.tensor(df[y].values, dtype=torch.float).reshape(-1,1)
	# Set embedding sizes
	cat_szs = [len(df[col].cat.categories) for col in cat_cols]