Created
January 15, 2019 23:25
-
-
Save nipra/7c4fcdaf0d87ca818c6cf4fabb1cb674 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality | |
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. | |
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. | |
import os | |
import warnings | |
import sys | |
import random | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import ElasticNet | |
from mlflow import log_metric, log_param, log_artifacts, get_artifact_uri, active_run,\ | |
get_tracking_uri, log_artifact | |
import mlflow | |
import mlflow.sklearn | |
def eval_metrics(actual, pred): | |
rmse = np.sqrt(mean_squared_error(actual, pred)) | |
mae = mean_absolute_error(actual, pred) | |
r2 = r2_score(actual, pred) | |
return rmse, mae, r2 | |
if __name__ == "__main__": | |
# import pdb; pdb.set_trace() | |
mlflow.set_tracking_uri("http://127.0.0.1:5000") | |
mlflow.set_experiment("test001") | |
print("Running {} with tracking URI {}".format(sys.argv[0], get_tracking_uri())) | |
# import pdb; pdb.set_trace() | |
run_uuid = active_run().info.run_uuid | |
service = mlflow.tracking.MlflowClient() | |
run = service.get_run(run_uuid) | |
print("Metadata & data for run with UUID %s: %s" % (run_uuid, run)) | |
warnings.filterwarnings("ignore") | |
np.random.seed(40) | |
# Read the wine-quality csv file (make sure you're running this from the root of MLflow!) | |
wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") | |
print("wine path " + wine_path) | |
data = pd.read_csv(wine_path) | |
# Split the data into training and test sets. (0.75, 0.25) split. | |
train, test = train_test_split(data) | |
# The predicted column is "quality" which is a scalar from [3, 9] | |
train_x = train.drop(["quality"], axis=1) | |
test_x = test.drop(["quality"], axis=1) | |
train_y = train[["quality"]] | |
test_y = test[["quality"]] | |
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 | |
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 | |
import pdb; pdb.set_trace() | |
# with mlflow.start_run() as run: | |
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | |
lr.fit(train_x, train_y) | |
predicted_qualities = lr.predict(test_x) | |
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | |
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) | |
print(" RMSE: %s" % rmse) | |
print(" MAE: %s" % mae) | |
print(" R2: %s" % r2) | |
mlflow.log_param("alpha", alpha) | |
mlflow.log_param("l1_ratio", l1_ratio) | |
mlflow.log_metric("rmse", rmse) | |
mlflow.log_metric("r2", r2) | |
mlflow.log_metric("mae", mae) | |
mlflow.sklearn.log_model(lr, "model") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment