Skip to content

Instantly share code, notes, and snippets.

@statwonk
Created March 24, 2024 15:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statwonk/c7a07d1f79f919bcd82814dd6d86fb6f to your computer and use it in GitHub Desktop.
Save statwonk/c7a07d1f79f919bcd82814dd6d86fb6f to your computer and use it in GitHub Desktop.
A tool to make data engineering easier, written by Cora and Alan https://www.loom.com/share/b5c46b716f23476ba13d22fca1dd1d72
import pandas as pd
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
class AIDataEngineer:
def __init__(self):
self.data = None
self.model = None
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
self.summary = None
def collect_data(self):
self.data = pd.read_csv('mtcars.csv')
def organize_data(self):
X = self.data[['cyl', 'wt']]
y = self.data['mpg']
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=0)
def train_model(self):
self.model = LinearRegression()
self.model.fit(self.X_train, self.y_train)
self.check_summary()
def check_summary(self):
X2 = sm.add_constant(self.X_train)
est = sm.OLS(self.y_train, X2)
self.summary = est.fit().summary()
if __name__ == '__main__':
ai_data_engineer = AIDataEngineer()
ai_data_engineer.collect_data()
ai_data_engineer.organize_data()
ai_data_engineer.train_model()
print(ai_data_engineer.summary)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment