Last active
January 17, 2024 03:37
-
-
Save jc1518/268cc5b48ae81964d39b20ca259ebb9b to your computer and use it in GitHub Desktop.
deploy_zephyr_7b_beta_in_sagemaker.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import sagemaker | |
import boto3 | |
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri | |
try: | |
role = sagemaker.get_execution_role() | |
except ValueError: | |
iam = boto3.client('iam') | |
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn'] | |
# Hub Model configuration. https://huggingface.co/models | |
hub = { | |
'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta', | |
'SM_NUM_GPUS': json.dumps(1), | |
'MAX_TOTAL_TOKENS': json.dumps(4096), | |
'MAX_INPUT_LENGTH': json.dumps(3000), | |
} | |
# create Hugging Face Model Class | |
huggingface_model = HuggingFaceModel( | |
image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"), | |
env=hub, | |
role=role, | |
) | |
# deploy model to SageMaker Inference | |
predictor = huggingface_model.deploy( | |
initial_instance_count=1, | |
instance_type="ml.g5.2xlarge", | |
container_startup_health_check_timeout=300, | |
) | |
# send request | |
predictor.predict({ | |
"inputs": "<|system|>\nYou are a pirate chatbot who always responds with Arr!</s>\n<|user|>\nThere's a llama on my lawn, how can I get rid of him?</s>\n<|assistant|>\n", | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment