pycaret’s gists

## deployment_28042020.py
# Import dataset from pycaret repository
from pycaret.datasets import get_data
insurance = get_data('insurance')

# Initialize environment
from pycaret.regression import *
r1 = setup(insurance, target = 'charges', session_id = 123,
           normalize = True,
           polynomial_features = True, trigonometry_features = True,
           feature_interaction=True,

## ignore_features.py
from pycaret.clustering import *
clu1 = setup(data, ignore_features = ['Country Name', 'Indicator Name'])

## define_data_types.py
# import regression module
from pycaret.regression import *

# init setup
reg1 = setup(data, target = 'charges', silent=True,
            categorical_features=['sex', 'smoker', 'region', 'children'],
            numeric_features=['age', 'bmi'])

## tune_model_n_iter.py
#tune with default n_iter i.e. 10
tuned_dt1 = tune_model('dt')

#tune with n_iter = 50
tuned_dt2 = tune_model('dt', n_iter = 50)

## tune_model_unsupervised.py
tuned_lda = tune_model(model='lda', supervised_target='status', estimator='xgboost')

## optimize_threshold_predict_model.py
predict_model(xgboost, probability_threshold=0.2)

## predict_model.py
# train a catboost model
catboost = create_model('catboost')

# predict on holdout set (when no data is passed)
pred_holdout = predict_model(catboost)

# predict on new dataset
new_data = pd.read_csv('new-data.csv')
pred_new = predict_model(catboost, data = new_data)

## setup.py
# Import module
from pycaret.classification import *

# Initialize setup (when using Notebook environment)
clf1 = setup(data, target = 'target-variable')

# Initialize setup (outside of Notebook environment)
clf1 = setup(data, target = 'target-variable', html = False)

# Initialize setup (When using remote execution such as Kaggle / GitHub actions / CI-CD pipelines)

## tune_model.py
# import classification module
from pycaret.classification import *

# init setup
clf1 = setup(data, target = 'name-of-target')

# train a decision tree model
dt = create_model('dt')

# tune hyperparameters of decision tree

## mlflow.py
# import classification module
from pycaret.classification import *

# init setup
clf1 = setup(data, target = 'name-of-target', log_experiment = True, experiment_name = 'exp-name-here')

# compare models
best = compare_models()

# start mlflow server on localhost:5000 (when using notebook)
	# Import dataset from pycaret repository
	from pycaret.datasets import get_data
	insurance = get_data('insurance')

	# Initialize environment
	from pycaret.regression import *
	r1 = setup(insurance, target = 'charges', session_id = 123,
	normalize = True,
	polynomial_features = True, trigonometry_features = True,
	feature_interaction=True,
	from pycaret.clustering import *
	clu1 = setup(data, ignore_features = ['Country Name', 'Indicator Name'])
	# import regression module
	from pycaret.regression import *

	# init setup
	reg1 = setup(data, target = 'charges', silent=True,
	categorical_features=['sex', 'smoker', 'region', 'children'],
	numeric_features=['age', 'bmi'])
	#tune with default n_iter i.e. 10
	tuned_dt1 = tune_model('dt')

	#tune with n_iter = 50
	tuned_dt2 = tune_model('dt', n_iter = 50)
	# train a catboost model
	catboost = create_model('catboost')

	# predict on holdout set (when no data is passed)
	pred_holdout = predict_model(catboost)

	# predict on new dataset
	new_data = pd.read_csv('new-data.csv')
	pred_new = predict_model(catboost, data = new_data)
	# Import module
	from pycaret.classification import *

	# Initialize setup (when using Notebook environment)
	clf1 = setup(data, target = 'target-variable')

	# Initialize setup (outside of Notebook environment)
	clf1 = setup(data, target = 'target-variable', html = False)

	# Initialize setup (When using remote execution such as Kaggle / GitHub actions / CI-CD pipelines)
	# import classification module
	from pycaret.classification import *

	# init setup
	clf1 = setup(data, target = 'name-of-target')

	# train a decision tree model
	dt = create_model('dt')

	# tune hyperparameters of decision tree