Skip to content

Instantly share code, notes, and snippets.

@agyaatcoder
Last active April 16, 2024 06:07
Show Gist options
  • Save agyaatcoder/d5debf3966ce4434c19aefa260e1b7e3 to your computer and use it in GitHub Desktop.
Save agyaatcoder/d5debf3966ce4434c19aefa260e1b7e3 to your computer and use it in GitHub Desktop.
A script for huggingface text generation inference on modal labs
import os
import subprocess
from modal import Image, Secret, Stub, enter, gpu, method, web_server
# Constants for the model and deployment setup.
MODEL_DIR = "/model"
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
QUANTIZATION = "awq"
DOCKER_IMAGE = "ghcr.io/huggingface/text-generation-inference:1.4"
PORT = 8000
def download_model():
"""
Downloads the model weights from Hugging Face hub using the model ID,
and ensures that the process uses the HF_TOKEN environment variable.
"""
subprocess.run(
[
"text-generation-server",
"download-weights",
MODEL_ID,
],
env={
**os.environ,
"HUGGING_FACE_HUB_TOKEN": os.environ["HF_TOKEN"],
},
check=True,
)
# Configuration for GPU resource allocation.
GPU_CONFIG = gpu.A10G()
# Stub creation for managing the model deployment lifecycle.
stub = Stub("text-generation-inference-2")
# Configuration of the Docker image used for running the model server.
tgi_image = (
Image.from_registry(DOCKER_IMAGE, add_python="3.10")
.dockerfile_commands("ENTRYPOINT []")
.run_function(download_model, timeout=60 * 20, secrets=[Secret.from_name("huggingface-secret")])
)
@stub.function(
image=tgi_image,
gpu=GPU_CONFIG,
concurrency_limit=1,
)
@web_server(port=PORT, startup_timeout=120)
def run_server():
"""
Launches the text generation model server with the specified configuration.
This function sets up a server listening on a specified port,
with quantization settings, and starts the server using a subprocess.
"""
model = MODEL_ID
port = PORT
cmd = f"text-generation-launcher --model-id {model} --hostname 0.0.0.0 --port {port} --quantize awq"
subprocess.Popen(cmd, shell=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment