Skip to content

Instantly share code, notes, and snippets.

@drbh
Created April 17, 2024 02:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drbh/17f0504f18776c1d30e54cfd2ae56030 to your computer and use it in GitHub Desktop.
Save drbh/17f0504f18776c1d30e54cfd2ae56030 to your computer and use it in GitHub Desktop.
Deploy Inference Endpoint and use tools from OpenAI client
from openai import OpenAI
ENDPOINT_URL = "ENDPOINT_URL"
HF_TOKEN = "YOUR_TOKEN"
# Initialize the client, pointing it to one of the available models
client = OpenAI(
base_url=ENDPOINT_URL + "/v1/",
api_key=HF_TOKEN,
)
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
},
"required": ["location", "format"],
},
},
},
{
"type": "function",
"function": {
"name": "get_n_day_weather_forecast",
"description": "Get an N-day weather forecast",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
"num_days": {
"type": "integer",
"description": "The number of days to forecast",
},
},
"required": ["location", "format", "num_days"],
},
},
},
]
for i in range(1, 200):
try:
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{
"role": "user",
"content": "Whats the weather in Paris, France?", # "what color is fire?"
},
],
tools=tools,
tool_choice="auto",
max_tokens=500,
seed=i,
)
called = chat_completion.choices[0].message.tool_calls[0].function # .name
print(i, called)
except Exception as e:
print(e)
curl https://api.endpoints.huggingface.cloud/v2/endpoint/drbh \
-X POST \
-d '{
"compute": {
"accelerator": "gpu",
"instanceSize": "2xlarge",
"instanceType": "p4de",
"scaling": {
"maxReplica": 1,
"minReplica": 1
}
},
"model": {
"framework": "pytorch",
"image": {
"custom": {
"health_route": "/health",
"env": {
"MAX_BATCH_PREFILL_TOKENS": "2048",
"MAX_BATCH_TOTAL_TOKENS": "1024000",
"MAX_INPUT_LENGTH": "1024",
"MAX_TOTAL_TOKENS": "32000",
"MODEL_ID": "/repository"
},
"url": "ghcr.io/huggingface/text-generation-inference:sha-e4d31a4"
}
},
"repository": "mistralai/Mixtral-8x7B-Instruct-v0.1",
"task": "text-generation"
},
"name": "lastest-tgi-test",
"provider": {
"region": "us-east-1",
"vendor": "aws"
},
"type": "protected"
}' \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_TOKEN"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment