Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from evalml.automl import AutoMLSearch
from evalml.preprocessing import split_data
import pandas as pd
X = pd.read_csv("/Users/freddy.boulton/Downloads/nyc_taxi.csv")
y = X.pop("trip_duration")
X_train, X_validation, y_train, y_validation = split_data(
X, y, problem_type="regression", test_size=0.5, random_seed=0
)
# Search for best pipelines
automl = AutoMLSearch(
X_train=X_train,
y_train=y_train,
max_batches=1,
problem_type="regression",
train_best_pipeline=False,
)
automl.search()
# Train best pipelines on training dataset
pipelines = {
pipeline_id: automl.get_pipeline(pipeline_id) for pipeline_id in automl.rankings.id
}
trained_pipeline_dic = automl.train_pipelines(pipelines.values())
# Score best pipelines on validation dataset
validation_result = automl.score_pipelines(
trained_pipeline_dic.values(), X_validation, y_validation, [automl.objective]
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment