Skip to content

Instantly share code, notes, and snippets.

@zeryx
Last active December 13, 2021 16:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zeryx/5a8e1bcf546fe43eccaf478a829ba69a to your computer and use it in GitHub Desktop.
Save zeryx/5a8e1bcf546fe43eccaf478a829ba69a to your computer and use it in GitHub Desktop.
Scikit learn Algorithmia demo using the Model Manifest system to tie model data and code together immutably
from Algorithmia import ADK
import joblib
## This function uses the model manifest `state` or `modelData` class to get model files defined in the model manifest automatically.
## No client work required, just make sure the name in `get_model` matches the name in your model manifest.
def load(state):
state['model'] = joblib.load(state.get_model("model"))
state['vectorizer'] = joblib.load(state.get_model("vectorizer"))
return state
def apply(input, state):
tokens = state['vectorizer'].transform([input])
preds = state['model'].predict(tokens)
if int(preds[0]) == 0:
formatted_output = {"Sentiment": "Negative", "review": input}
else:
formatted_output = {"Sentiment": "Positive", "review": input}
return formatted_output
algorithm = ADK(apply, load)
algorithm.init("Algorithmia is great.")
{
"required_files": [
{
"name":"model",
"source_uri": "data://demo/sentiment_classifier_imbd/passive_aggressive_model_hash.joblib",
"metadata": {
"dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266"
}
},
{
"name":"vectorizer",
"source_uri": "data://demo/sentiment_classifier_imbd/hash_vectorizer.joblib",
"metadata": {
"dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266"
}
}
],
"optional_files": []
}
{"required_files": [{"name": "model", "source_uri": "data://demo/sentiment_classifier_imbd/passive_aggressive_model_hash.joblib", "metadata": {"dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266"}, "md5_checksum": "3ca7d81c325f8a1964bac73edce582cc"}, {"name": "vectorizer", "source_uri": "data://demo/sentiment_classifier_imbd/hash_vectorizer.joblib", "metadata": {"dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266"}, "md5_checksum": "9c498f1ef9610a714846e843ef26d554"}], "optional_files": [], "timestamp": "1639411596.847047", "lock_checksum": "d6976c758eb949a498d4f0a40a8f250f"}
algorithmia>=1.0.0,<2.0
scikit-learn==0.23.1
pillow==7.2.0
numpy==1.16.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment