Skip to content

Instantly share code, notes, and snippets.

@agyaatcoder
agyaatcoder / vllm_openai_compatible_mixtral.py
Created April 24, 2024 19:11
LLM inference on modal labs through vLLM engine. This gives an endpoint which is OpenAI python client compatible.
import os
import subprocess
from modal import Image, Secret, Stub, enter, gpu, method, web_server
MODEL_DIR = "/model"
BASE_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@agyaatcoder
agyaatcoder / llama8b-instruct.py
Last active April 18, 2024 16:54
Script to get llama-3-8b-instruct model running on modal labs
#Meta-Llama-3-8B-Instruct is gated model and requires access on hf first to be able to successfully run this
import os
import subprocess
from modal import Image, Secret, Stub, gpu, web_server
MODEL_DIR = "/model"
MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
DOCKER_IMAGE = "ghcr.io/huggingface/text-generation-inference:1.4"
PORT = 8000
@agyaatcoder
agyaatcoder / hf-tei-modal-labs.py
Last active April 24, 2024 08:53
Huggingface Text Embedding Inference on Modal Labs
import subprocess
import os
from pathlib import Path
import socket
from modal import Image, Mount, Stub, Secret, web_server, gpu
MODEL_ID = "BAAI/bge-small-en-v1.5"
PORT = 8080
DOCKER_IMAGE = "ghcr.io/huggingface/text-embeddings-inference:86-0.4.0"
@agyaatcoder
agyaatcoder / hf-tgi-modal-labs.py
Last active April 16, 2024 06:07
A script for huggingface text generation inference on modal labs
import os
import subprocess
from modal import Image, Secret, Stub, enter, gpu, method, web_server
# Constants for the model and deployment setup.
MODEL_DIR = "/model"
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.2-AWQ"
QUANTIZATION = "awq"
DOCKER_IMAGE = "ghcr.io/huggingface/text-generation-inference:1.4"