-
-
Save moezali1/f258195ba1c677654abffb0d1acb2cc0 to your computer and use it in GitHub Desktop.
from tqdm import tqdm | |
from pycaret.regression import * | |
all_ts = data['time_series'].unique() | |
all_results = [] | |
final_model = {} | |
for i in tqdm(all_ts): | |
df_subset = data[data['time_series'] == i] | |
# initialize setup from pycaret.regression | |
s = setup(df_subset, target = 'sales', train_size = 0.95, | |
data_split_shuffle = False, fold_strategy = 'timeseries', fold = 3, | |
ignore_features = ['date', 'time_series'], | |
numeric_features = ['day_of_year', 'year'], | |
categorical_features = ['month', 'day_of_week'], | |
silent = True, verbose = False, session_id = 123) | |
# compare all models and select best one based on MAE | |
best_model = compare_models(sort = 'MAE', verbose=False) | |
# capture the compare result grid and store best model in list | |
p = pull().iloc[0:1] | |
p['time_series'] = str(i) | |
all_results.append(p) | |
# finalize model i.e. fit on entire data including test set | |
f = finalize_model(best_model) | |
# attach final model to a dictionary | |
final_model[i] = f | |
# save transformation pipeline and model as pickle file | |
save_model(f, model_name='trained_models/' + str(i), verbose=False) |
I am facing the same issue.
0%
0/50 [00:00<?, ?it/s]
ValueError Traceback (most recent call last)
in
27
28 # finalize model i.e. fit on entire data including test set
---> 29 f = finalize_model(best_model)
30
31 # attach final model to a dictionary
~\AppData\Roaming\Python\Python38\site-packages\pycaret\regression.py in finalize_model(estimator, fit_kwargs, groups, model_only)
1789 """
1790
-> 1791 return pycaret.internal.tabular.finalize_model(
1792 estimator=estimator,
1793 fit_kwargs=fit_kwargs,
~\AppData\Roaming\Python\Python38\site-packages\pycaret\internal\tabular.py in finalize_model(estimator, fit_kwargs, groups, model_only, display)
8729 logger.info(f"Finalizing {estimator}")
8730 display.clear_output()
-> 8731 model_final, model_fit_time = create_model_supervised(
8732 estimator=estimator,
8733 verbose=False,
~\AppData\Roaming\Python\Python38\site-packages\pycaret\internal\tabular.py in create_model_supervised(estimator, fold, round, cross_validation, predict, fit_kwargs, groups, refit, verbose, system, X_train_data, y_train_data, metrics, display, **kwargs)
2903 )
2904 elif not hasattr(estimator, "fit"):
-> 2905 raise ValueError(
2906 f"Estimator {estimator} does not have the required fit() method."
2907 )
ValueError: Estimator [] does not have the required fit() method.
Can confirm that this happened to me aswell using Azure Databricks.
Edit: Seems to be coming from tqdm() that only produces 1 sample in df_subset = data[data['time_series] == i]
ValueError: With n_samples=1, test_size=0.050000000000000044 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.
Yes it is trying to split 1 sample row into training and testing dataset which seems impossible.
I resolved this error. So it came because the data was too less to split it into training and testing dataset. So you need to remove the items from Excel sheet whose data is not enough. Keep more thn 10 rows atleast.
Hello Moez,
Pretty new to Pycaret was trying to build multiple sku forecasting model.. however I'm getting the error at below step. I've modeled my data as per the tutorial. I think there was problem at 'trained_models/' and in revision codes I also see '/trained_models/', surplisingly both are giving same error. Can you plz take a look.
save transformation pipeline and model as pickle file
save_model(f, model_name='trained_models/' + str(i), verbose=False)
FileNotFoundError Traceback (most recent call last)
/tmp/ipykernel_63/1374943528.py in
3
4 # save transformation pipeline and model as pickle file
----> 5 save_model(f, model_name='trained_models/' + str(i), verbose=False)
/opt/conda/lib/python3.7/site-packages/pycaret/regression.py in save_model(model, model_name, model_only, verbose, **kwargs)
1935 model_only=model_only,
1936 verbose=verbose,
-> 1937 **kwargs,
1938 )
1939
/opt/conda/lib/python3.7/site-packages/pycaret/internal/tabular.py in save_model(model, model_name, model_only, verbose, **kwargs)
9240
9241 return pycaret.internal.persistence.save_model(
-> 9242 model, model_name, None if model_only else prep_pipe, verbose, **kwargs
9243 )
9244
/opt/conda/lib/python3.7/site-packages/pycaret/internal/persistence.py in save_model(model, model_name, prep_pipe_, verbose, **kwargs)
312
313 model_name = f"{model_name}.pkl"
--> 314 joblib.dump(model_, model_name, **kwargs)
315 if verbose:
316 print("Transformation Pipeline and Model Successfully Saved")
/opt/conda/lib/python3.7/site-packages/joblib/numpy_pickle.py in dump(value, filename, compress, protocol, cache_size)
477 NumpyPickler(f, protocol=protocol).dump(value)
478 elif is_filename:
--> 479 with open(filename, 'wb') as f:
480 NumpyPickler(f, protocol=protocol).dump(value)
481 else:
FileNotFoundError: [Errno 2] No such file or directory: 'trained_models/SEND00867.pkl'
Hi Moez, I'm new to pycaret and when I tried to recreate a model using this code I always seem to get 'ValueError: Estimator [] does not have the required fit() method.' error. I converted data to match the exact format that you demonstrated in this code. Not sure how I should handle this issue. Any recommendation ?