-
-
Save zeryx/22e73959db6b010eec06301349dbbf3a to your computer and use it in GitHub Desktop.
Fashion MNIST deployment on Algorithmia + DR MLOps Monitoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"!pip install datarobot-mlops-connected-client torch algorithmia==2.0.5" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"from datarobot.mlops.connected.client import MLOpsClient\n", | |
"import Algorithmia\n", | |
"from uuid import uuid4\n", | |
"\n", | |
"datarobot_api_token = \"DATAROBOT_API_TOKEN_HERE\"\n", | |
"algorithmia_api_key = \"ALGORITHMIA_API_KEY_HERE\"\n", | |
"algorithm_name = \"fashion_mnist_mlops\"\n", | |
"algorithmia_endpoint = \"https://api.algorithmia.com\"\n", | |
"datarobot_endpoint = \"https://app.datarobot.com\"" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"mlops_connected_client = MLOpsClient(datarobot_endpoint, datarobot_api_token)\n", | |
"\n", | |
"# Add training_data to model configuration, we need to add the target from our dataset, our dataset has 10 classes.\n", | |
"model_info = {\n", | |
" \"name\": \"FashionMNIST Pytorch Model\",\n", | |
" \"target\": {\"name\": \"label\", \"type\": \"Multiclass\", \"classNames\": [\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\"]},\n", | |
"}\n", | |
"\n", | |
"# Create the model package\n", | |
"print('Create model package')\n", | |
"model_pkg_id = mlops_connected_client.create_model_package(model_info)\n", | |
"model_pkg = mlops_connected_client.get_model_package(model_pkg_id)\n", | |
"model_id = model_pkg[\"modelId\"]\n", | |
"\n", | |
"# Create Prediction Environment (needed for Challengers)\n", | |
"print('Create Prediction Environment')\n", | |
"predEnv = {\"name\": \"External Prediction Environment / Algorithmia\",\n", | |
" \"description\": \"Running on Algorithmia\",\n", | |
" \"platform\": 'other',\n", | |
" \"supportedModelFormats\": ['externalModel']\n", | |
" }\n", | |
"prediction_environment_id = mlops_connected_client.create_prediction_environment(predEnv)\n", | |
"\n", | |
"# Deploy the model package\n", | |
"print('Deploy model package')\n", | |
"\n", | |
"# Give the deployment a name:\n", | |
"DEPLOYMENT_NAME = f\"Fashion MNIST Classification Model {str(uuid4())}\"\n", | |
"\n", | |
"deployment_id = mlops_connected_client.deploy_model_package(model_pkg[\"id\"],\n", | |
" DEPLOYMENT_NAME,\n", | |
" prediction_environment_id=prediction_environment_id)\n", | |
"print(\"successfully created deployment\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"print(f\"model id: {model_id}\\ndeployment id: {deployment_id}\")\n", | |
"# Now that we have the model id and deployment ids, lets train and build our ML model." | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"## If you want to train a new model, call this block\n", | |
"\n", | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"import torch.nn.functional as F\n", | |
"import torch.optim as optim\n", | |
"import torchvision\n", | |
"\n", | |
"# Set the hyperparameters for this training project\n", | |
"n_epochs = 10\n", | |
"batch_size = 64\n", | |
"learning_rate = 0.001\n", | |
"\n", | |
"# Get torch modules defined and dataset loaded\n", | |
"\n", | |
"torch.multiprocessing.freeze_support()\n", | |
"\n", | |
"trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True,\n", | |
" transform=torchvision.transforms.ToTensor())\n", | |
"testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True,\n", | |
" transform=torchvision.transforms.ToTensor())\n", | |
"\n", | |
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)\n", | |
"testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=2)\n", | |
"\n", | |
"# Build the neural network, expand on top of nn.Module\n", | |
"\n", | |
"class Network(nn.Module):\n", | |
" def __init__(self):\n", | |
" super().__init__()\n", | |
"\n", | |
" # define layers\n", | |
" self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)\n", | |
" self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)\n", | |
"\n", | |
" self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=256)\n", | |
" self.fc2 = nn.Linear(in_features=256, out_features=120)\n", | |
" self.out = nn.Linear(in_features=120, out_features=10)\n", | |
"\n", | |
" # define forward function\n", | |
" def forward(self, t):\n", | |
" # conv 1\n", | |
" t = self.conv1(t)\n", | |
" t = F.relu(t)\n", | |
" t = F.max_pool2d(t, kernel_size=2, stride=2)\n", | |
"\n", | |
" # conv 2\n", | |
" t = self.conv2(t)\n", | |
" t = F.relu(t)\n", | |
" t = F.max_pool2d(t, kernel_size=2, stride=2)\n", | |
"\n", | |
" # fc1\n", | |
" t = t.reshape(-1, 12 * 4 * 4)\n", | |
" t = self.fc1(t)\n", | |
" t = F.relu(t)\n", | |
"\n", | |
" # fc2\n", | |
" t = self.fc2(t)\n", | |
" t = F.relu(t)\n", | |
"\n", | |
" # output\n", | |
" t = self.out(t)\n", | |
" # don't need softmax here since we'll use cross-entropy as activation.\n", | |
"\n", | |
" return t\n", | |
"# As this model will be running in production, we should trace it into compiled torchscript\n", | |
"# to make it faster.\n", | |
"\n", | |
"model = torch.jit.script(Network())\n", | |
"\n", | |
"#Define the loss function\n", | |
"criterion = nn.CrossEntropyLoss()\n", | |
"\n", | |
"# Define the optimizer\n", | |
"optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n", | |
"\n", | |
"## Train the model\n", | |
"\n", | |
"model.train()\n", | |
"i = 0\n", | |
"for j in range(n_epochs):\n", | |
" print(\"epoch: \", j)\n", | |
" for data, target in trainloader:\n", | |
" i += 1\n", | |
" optimizer.zero_grad()\n", | |
" output = model(data)\n", | |
" loss = criterion(output, target)\n", | |
" loss.backward()\n", | |
" optimizer.step()\n", | |
" if i % 100 == 0:\n", | |
" # data_example = data\n", | |
" print('Loss: {:.6f}'.format(loss))\n", | |
"torch.jit.save(model, 'model.t5')\n", | |
"print(\"Model Trained and Saved\")\n", | |
"\n", | |
"\n", | |
"## Test the model\n", | |
"\n", | |
"test_model = torch.jit.load('model.t5')\n", | |
"test_loss = 0\n", | |
"correct = 0\n", | |
"with torch.no_grad():\n", | |
" for data, target in testloader:\n", | |
" output = test_model(data)\n", | |
" test_loss += criterion(output, target)\n", | |
" pred = output.data.max(1, keepdim=True)[1]\n", | |
" correct += pred.eq(target.data.view_as(pred)).sum()\n", | |
"test_loss /= len(testloader.dataset)\n", | |
"print('\\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", | |
" test_loss, correct, len(testloader.dataset),\n", | |
" 100. * correct / len(testloader.dataset)))" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"# Create Algorithmia Client instance\n", | |
"import time\n", | |
"\n", | |
"client = Algorithmia.client(api_key=algorithmia_api_key, api_address=algorithmia_endpoint)\n", | |
"USERNAME = client.username()\n", | |
"MODEL_VERSION = str(time.time()).split('.')[1]\n", | |
"algorithm = f\"algo://{USERNAME}/{algorithm_name}\"" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"models_collection = f\"data://{USERNAME}/mnist_models\"\n", | |
"examples_collection = f\"data://{USERNAME}/mnist_examples\"\n", | |
"\n", | |
"remote_model_path = f\"{models_collection}/{MODEL_VERSION}.t5\"\n", | |
"\n", | |
"## Save model to Algorithmia\n", | |
"# Using the algorithmia client, lets upload the model file to Algorithmia, with a unique filename\n", | |
"\n", | |
"local_model_path = \"./model.t5\"\n", | |
"# Lets make sure that both the mnist_examples and mnist_models directories both exist\n", | |
"if client.dir(models_collection).exists() is False:\n", | |
" client.dir(models_collection).create()\n", | |
" print(\"Successfully created models collection\")\n", | |
"else:\n", | |
" print(\"models_collection already exists\")\n", | |
"if client.dir(examples_collection).exists() is False:\n", | |
" client.dir(examples_collection).create()\n", | |
" print(\"Successfully created examples collection\")\n", | |
"else:\n", | |
" print(\"examples_collection already exists\")\n", | |
"\n", | |
"# Finally, lets upload our model to Algorithmia\n", | |
"client.file(remote_model_path).putFile(local_model_path)\n", | |
"print(\"Successfully uploaded model file to Algorithmia\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"# Lets go and find all available environments, and select the Python 3.9 one with DR MLOps enabled.\n", | |
"environments = client.get_environment(\"python3\")\n", | |
"environment_name = \"Python 3.9 + MLOps Agent, Fashion MNIST edition\"\n", | |
"environment_id = None\n", | |
"for result in environments['environments']:\n", | |
" if result['display_name'] == environment_name:\n", | |
" print(f\"{result['display_name']} - {result['id']}\")\n", | |
" environment_id = result['id']\n", | |
"if environment_id is None:\n", | |
" raise Exception(f\"Could not find environment with name {environment_name}\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"# Lets finally create the Algorithm\n", | |
"algo = client.algo(algorithm)\n", | |
"if algo.exists():\n", | |
" raise Exception(\"Algorithm already exists\")\n", | |
"details = {\n", | |
" \"label\": f\"{algo.algoname}\",\n", | |
"}\n", | |
"settings = {\n", | |
" \"license\": \"apl\",\n", | |
" \"source_visibility\": \"closed\",\n", | |
" \"algorithm_environment\": environment_id,\n", | |
" \"network_access\": \"full\",\n", | |
" \"pipeline_enabled\": True\n", | |
"}\n", | |
"response = algo.create(details = details, settings=settings)\n", | |
"print(\"Algorithm Created\")\n", | |
"algo.compile()\n", | |
"print(\"compile completed\")\n", | |
"# To make this completely unattended, we deploy our variables as \"secrets\", you can think of these as environment variables\n", | |
"algo.set_secret(short_name = \"model_id\", secret_key = \"MODEL_ID\", secret_value = model_id)\n", | |
"algo.set_secret(short_name = \"deploy_id\", secret_key = \"DEPLOYMENT_ID\", secret_value = deployment_id)\n", | |
"algo.set_secret(short_name = \"model_path\", secret_key = \"MODEL_PATH\", secret_value = remote_model_path)\n", | |
"algo.set_secret(short_name= \"datarobot_endpoint\", secret_key=\"MLOPS_SERVICE_URL\", secret_value=datarobot_endpoint)\n", | |
"algo.set_secret(short_name = \"dr_mlops_token\", secret_key = \"DATAROBOT_MLOPS_API_TOKEN\", secret_value = datarobot_api_token)\n", | |
"algo.compile()\n", | |
"algo.publish(version_info={\n", | |
" \"version_type\": \"minor\",\n", | |
" \"release_notes\": \"Automatically deployed by Notebook\",\n", | |
"},\n", | |
" details={\"label\": \" \"}\n", | |
")\n", | |
"print(\"Algorithm has been published and ready for testing.\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"outputs": [], | |
"source": [ | |
"algo = client.algo(f\"algo://{USERNAME}/{algorithm_name}/latestPrivate\")\n", | |
"print(algo.pipe(\"data://zeryx/mnist_examples/boot.jpg\"))\n", | |
"\n", | |
"print(\"algorithm is returning correctly, lets check the dashboard\")\n", | |
"print(algo.url)\n", | |
"\n", | |
"print(f\"click {datarobot_endpoint}/deployments/{deployment_id}/overview to see the fully setup MLOps dashboard\")" | |
], | |
"metadata": { | |
"collapsed": false, | |
"pycharm": { | |
"name": "#%%\n" | |
} | |
} | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment