Skip to content

Instantly share code, notes, and snippets.

@zeryx
Last active August 9, 2022 17:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zeryx/22e73959db6b010eec06301349dbbf3a to your computer and use it in GitHub Desktop.
Save zeryx/22e73959db6b010eec06301349dbbf3a to your computer and use it in GitHub Desktop.
Fashion MNIST deployment on Algorithmia + DR MLOps Monitoring
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"!pip install datarobot-mlops-connected-client torch algorithmia==2.0.5"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from datarobot.mlops.connected.client import MLOpsClient\n",
"import Algorithmia\n",
"from uuid import uuid4\n",
"\n",
"datarobot_api_token = \"DATAROBOT_API_TOKEN_HERE\"\n",
"algorithmia_api_key = \"ALGORITHMIA_API_KEY_HERE\"\n",
"algorithm_name = \"fashion_mnist_mlops\"\n",
"algorithmia_endpoint = \"https://api.algorithmia.com\"\n",
"datarobot_endpoint = \"https://app.datarobot.com\""
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"mlops_connected_client = MLOpsClient(datarobot_endpoint, datarobot_api_token)\n",
"\n",
"# Add training_data to model configuration, we need to add the target from our dataset, our dataset has 10 classes.\n",
"model_info = {\n",
" \"name\": \"FashionMNIST Pytorch Model\",\n",
" \"target\": {\"name\": \"label\", \"type\": \"Multiclass\", \"classNames\": [\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\"]},\n",
"}\n",
"\n",
"# Create the model package\n",
"print('Create model package')\n",
"model_pkg_id = mlops_connected_client.create_model_package(model_info)\n",
"model_pkg = mlops_connected_client.get_model_package(model_pkg_id)\n",
"model_id = model_pkg[\"modelId\"]\n",
"\n",
"# Create Prediction Environment (needed for Challengers)\n",
"print('Create Prediction Environment')\n",
"predEnv = {\"name\": \"External Prediction Environment / Algorithmia\",\n",
" \"description\": \"Running on Algorithmia\",\n",
" \"platform\": 'other',\n",
" \"supportedModelFormats\": ['externalModel']\n",
" }\n",
"prediction_environment_id = mlops_connected_client.create_prediction_environment(predEnv)\n",
"\n",
"# Deploy the model package\n",
"print('Deploy model package')\n",
"\n",
"# Give the deployment a name:\n",
"DEPLOYMENT_NAME = f\"Fashion MNIST Classification Model {str(uuid4())}\"\n",
"\n",
"deployment_id = mlops_connected_client.deploy_model_package(model_pkg[\"id\"],\n",
" DEPLOYMENT_NAME,\n",
" prediction_environment_id=prediction_environment_id)\n",
"print(\"successfully created deployment\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"print(f\"model id: {model_id}\\ndeployment id: {deployment_id}\")\n",
"# Now that we have the model id and deployment ids, lets train and build our ML model."
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"## If you want to train a new model, call this block\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torchvision\n",
"\n",
"# Set the hyperparameters for this training project\n",
"n_epochs = 10\n",
"batch_size = 64\n",
"learning_rate = 0.001\n",
"\n",
"# Get torch modules defined and dataset loaded\n",
"\n",
"torch.multiprocessing.freeze_support()\n",
"\n",
"trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True,\n",
" transform=torchvision.transforms.ToTensor())\n",
"testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True,\n",
" transform=torchvision.transforms.ToTensor())\n",
"\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=2)\n",
"\n",
"# Build the neural network, expand on top of nn.Module\n",
"\n",
"class Network(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" # define layers\n",
" self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)\n",
" self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)\n",
"\n",
" self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=256)\n",
" self.fc2 = nn.Linear(in_features=256, out_features=120)\n",
" self.out = nn.Linear(in_features=120, out_features=10)\n",
"\n",
" # define forward function\n",
" def forward(self, t):\n",
" # conv 1\n",
" t = self.conv1(t)\n",
" t = F.relu(t)\n",
" t = F.max_pool2d(t, kernel_size=2, stride=2)\n",
"\n",
" # conv 2\n",
" t = self.conv2(t)\n",
" t = F.relu(t)\n",
" t = F.max_pool2d(t, kernel_size=2, stride=2)\n",
"\n",
" # fc1\n",
" t = t.reshape(-1, 12 * 4 * 4)\n",
" t = self.fc1(t)\n",
" t = F.relu(t)\n",
"\n",
" # fc2\n",
" t = self.fc2(t)\n",
" t = F.relu(t)\n",
"\n",
" # output\n",
" t = self.out(t)\n",
" # don't need softmax here since we'll use cross-entropy as activation.\n",
"\n",
" return t\n",
"# As this model will be running in production, we should trace it into compiled torchscript\n",
"# to make it faster.\n",
"\n",
"model = torch.jit.script(Network())\n",
"\n",
"#Define the loss function\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"# Define the optimizer\n",
"optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n",
"\n",
"## Train the model\n",
"\n",
"model.train()\n",
"i = 0\n",
"for j in range(n_epochs):\n",
" print(\"epoch: \", j)\n",
" for data, target in trainloader:\n",
" i += 1\n",
" optimizer.zero_grad()\n",
" output = model(data)\n",
" loss = criterion(output, target)\n",
" loss.backward()\n",
" optimizer.step()\n",
" if i % 100 == 0:\n",
" # data_example = data\n",
" print('Loss: {:.6f}'.format(loss))\n",
"torch.jit.save(model, 'model.t5')\n",
"print(\"Model Trained and Saved\")\n",
"\n",
"\n",
"## Test the model\n",
"\n",
"test_model = torch.jit.load('model.t5')\n",
"test_loss = 0\n",
"correct = 0\n",
"with torch.no_grad():\n",
" for data, target in testloader:\n",
" output = test_model(data)\n",
" test_loss += criterion(output, target)\n",
" pred = output.data.max(1, keepdim=True)[1]\n",
" correct += pred.eq(target.data.view_as(pred)).sum()\n",
"test_loss /= len(testloader.dataset)\n",
"print('\\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n",
" test_loss, correct, len(testloader.dataset),\n",
" 100. * correct / len(testloader.dataset)))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Create Algorithmia Client instance\n",
"import time\n",
"\n",
"client = Algorithmia.client(api_key=algorithmia_api_key, api_address=algorithmia_endpoint)\n",
"USERNAME = client.username()\n",
"MODEL_VERSION = str(time.time()).split('.')[1]\n",
"algorithm = f\"algo://{USERNAME}/{algorithm_name}\""
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"models_collection = f\"data://{USERNAME}/mnist_models\"\n",
"examples_collection = f\"data://{USERNAME}/mnist_examples\"\n",
"\n",
"remote_model_path = f\"{models_collection}/{MODEL_VERSION}.t5\"\n",
"\n",
"## Save model to Algorithmia\n",
"# Using the algorithmia client, lets upload the model file to Algorithmia, with a unique filename\n",
"\n",
"local_model_path = \"./model.t5\"\n",
"# Lets make sure that both the mnist_examples and mnist_models directories both exist\n",
"if client.dir(models_collection).exists() is False:\n",
" client.dir(models_collection).create()\n",
" print(\"Successfully created models collection\")\n",
"else:\n",
" print(\"models_collection already exists\")\n",
"if client.dir(examples_collection).exists() is False:\n",
" client.dir(examples_collection).create()\n",
" print(\"Successfully created examples collection\")\n",
"else:\n",
" print(\"examples_collection already exists\")\n",
"\n",
"# Finally, lets upload our model to Algorithmia\n",
"client.file(remote_model_path).putFile(local_model_path)\n",
"print(\"Successfully uploaded model file to Algorithmia\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Lets go and find all available environments, and select the Python 3.9 one with DR MLOps enabled.\n",
"environments = client.get_environment(\"python3\")\n",
"environment_name = \"Python 3.9 + MLOps Agent, Fashion MNIST edition\"\n",
"environment_id = None\n",
"for result in environments['environments']:\n",
" if result['display_name'] == environment_name:\n",
" print(f\"{result['display_name']} - {result['id']}\")\n",
" environment_id = result['id']\n",
"if environment_id is None:\n",
" raise Exception(f\"Could not find environment with name {environment_name}\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Lets finally create the Algorithm\n",
"algo = client.algo(algorithm)\n",
"if algo.exists():\n",
" raise Exception(\"Algorithm already exists\")\n",
"details = {\n",
" \"label\": f\"{algo.algoname}\",\n",
"}\n",
"settings = {\n",
" \"license\": \"apl\",\n",
" \"source_visibility\": \"closed\",\n",
" \"algorithm_environment\": environment_id,\n",
" \"network_access\": \"full\",\n",
" \"pipeline_enabled\": True\n",
"}\n",
"response = algo.create(details = details, settings=settings)\n",
"print(\"Algorithm Created\")\n",
"algo.compile()\n",
"print(\"compile completed\")\n",
"# To make this completely unattended, we deploy our variables as \"secrets\", you can think of these as environment variables\n",
"algo.set_secret(short_name = \"model_id\", secret_key = \"MODEL_ID\", secret_value = model_id)\n",
"algo.set_secret(short_name = \"deploy_id\", secret_key = \"DEPLOYMENT_ID\", secret_value = deployment_id)\n",
"algo.set_secret(short_name = \"model_path\", secret_key = \"MODEL_PATH\", secret_value = remote_model_path)\n",
"algo.set_secret(short_name= \"datarobot_endpoint\", secret_key=\"MLOPS_SERVICE_URL\", secret_value=datarobot_endpoint)\n",
"algo.set_secret(short_name = \"dr_mlops_token\", secret_key = \"DATAROBOT_MLOPS_API_TOKEN\", secret_value = datarobot_api_token)\n",
"algo.compile()\n",
"algo.publish(version_info={\n",
" \"version_type\": \"minor\",\n",
" \"release_notes\": \"Automatically deployed by Notebook\",\n",
"},\n",
" details={\"label\": \" \"}\n",
")\n",
"print(\"Algorithm has been published and ready for testing.\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"algo = client.algo(f\"algo://{USERNAME}/{algorithm_name}/latestPrivate\")\n",
"print(algo.pipe(\"data://zeryx/mnist_examples/boot.jpg\"))\n",
"\n",
"print(\"algorithm is returning correctly, lets check the dashboard\")\n",
"print(algo.url)\n",
"\n",
"print(f\"click {datarobot_endpoint}/deployments/{deployment_id}/overview to see the fully setup MLOps dashboard\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment