Skip to content

Instantly share code, notes, and snippets.

Avatar

PyCaret pycaret

View GitHub Profile
View pycaret_github_main.yml
name: PyCaret AutoML Git Action
on:
push :
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: PyCaret AutoML Git Action
id: model
View github_action_action.yml
name: "PyCaret AutoML Git Action"
description: "A simple example of AutoML created using PyCaret 2.0"
author: "Moez Ali"
inputs:
DATASET:
description: "Dataset for Training"
required: true
default: "juice"
TARGET:
description: "Name of Target variable"
View Dockerfile
FROM python:3.7-slim
WORKDIR /app
ADD . /app
RUN apt-get update && apt-get install -y libgomp1
RUN pip install --trusted-host pypi.python.org -r requirements.txt
View github_action_app.py
import os, ast
import pandas as pd
dataset = os.environ["INPUT_DATASET"]
target = os.environ["INPUT_TARGET"]
usecase = os.environ["INPUT_USECASE"]
dataset_path = "https://raw.githubusercontent.com/" + os.environ["GITHUB_REPOSITORY"] + "/master/" + os.environ["INPUT_DATASET"] + '.csv'
data = pd.read_csv(dataset_path)
data.head()
View script.py
# import libraries
import pandas as pd
import sys
# define command line parameters
data = sys.argv[1]
target = sys.argv[2]
# load data (replace this part with your own script)
from pycaret.datasets import get_data
View mlflow.py
# import classification module
from pycaret.classification import *
# init setup
clf1 = setup(data, target = 'name-of-target', log_experiment = True, experiment_name = 'exp-name-here')
# compare models
best = compare_models()
# start mlflow server on localhost:5000 (when using notebook)
View utils.py
# select and finalize the best model in the active run
best_model = automl() #returns the best model based on CV score
# select and finalize the best model based on 'F1' on hold_out set
best_model_holdout = automl(optimize = 'F1', use_holdout = True)
# save model
save_model(model, 'c:/path-to-directory/model-name')
# load model
View setup.py
# Import module
from pycaret.classification import *
# Initialize setup (when using Notebook environment)
clf1 = setup(data, target = 'target-variable')
# Initialize setup (outside of Notebook environment)
clf1 = setup(data, target = 'target-variable', html = False)
# Initialize setup (When using remote execution such as Kaggle / GitHub actions / CI-CD pipelines)
View predict_model.py
# train a catboost model
catboost = create_model('catboost')
# predict on holdout set (when no data is passed)
pred_holdout = predict_model(catboost)
# predict on new dataset
new_data = pd.read_csv('new-data.csv')
pred_new = predict_model(catboost, data = new_data)
View ensemble_model.py
# import classification module
from pycaret.classification import *
# init setup
clf1 = setup(data, target = 'name-of-target')
# train a decision tree model
dt = create_model('dt')
# train a bagging classifier on dt
You can’t perform that action at this time.