Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Created June 30, 2024 06:34
Show Gist options
  • Save BexTuychiev/e3ecdbf0e6feabc84f4ebd32ba17dd38 to your computer and use it in GitHub Desktop.
Save BexTuychiev/e3ecdbf0e6feabc84f4ebd32ba17dd38 to your computer and use it in GitHub Desktop.
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import gradio as gr
# Load the diamonds dataset
diamonds = sns.load_dataset("diamonds")
# Prepare the features and target
X = diamonds.drop("price", axis=1)
y = diamonds["price"]
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Define the preprocessing steps
numeric_features = ["carat", "depth", "table", "x", "y", "z"]
categorical_features = ["cut", "color", "clarity"]
preprocessor = ColumnTransformer(
transformers=[
("num", StandardScaler(), numeric_features),
("cat", OneHotEncoder(drop="first"), categorical_features),
]
)
# Create a pipeline with preprocessing and model
model = Pipeline(
[
("preprocessor", preprocessor),
("regressor", RandomForestRegressor(n_estimators=100, random_state=42)),
]
)
# Fit the model
model.fit(X_train, y_train)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment