Skip to content

Instantly share code, notes, and snippets.

@AIAnytime
Created February 8, 2024 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save AIAnytime/be79b6a23a8ca5864604ce7f98c1574c to your computer and use it in GitHub Desktop.
Save AIAnytime/be79b6a23a8ca5864604ce7f98c1574c to your computer and use it in GitHub Desktop.
RunPod Code
import os
#set you runpod key as a environment variable
os.environ['RUNPOD_API_KEY'] = "your_runpod_api_key"
import runpod
from IPython.display import display, Markdown
runpod.api_key = os.getenv("RUNPOD_API_KEY", "your_runpod_api_key")
if runpod.api_key == "your_runpod_api_key":
display(
Markdown(
"It appears that you don't have a RunPod API key. You can obtain one at [runpod.io](https://runpod.io?ref=s7508tca)"
)
)
raise AssertionError("Missing RunPod API key")
#show all possible available GPUs
runpod.get_gpus()
# Create your pod, you can set the data_center_id (optional)
# Decide which model you want to use, here we use falcon-40b
pod = runpod.create_pod(
name="Falcon-40B",
image_name="ghcr.io/huggingface/text-generation-inference:0.8",
gpu_type_id="NVIDIA A100 80GB PCIe",
cloud_type="SECURE",
# data_center_id="US-KS-1",
docker_args=f"--model-id tiiuae/falcon-40b --num-shard {gpu_count}",
gpu_count=2,
volume_in_gb=195,
container_disk_in_gb=5,
ports="80/http,29500/http",
volume_mount_path="/data",
)
#Create inference server using your pod LLM throw langchain
from langchain.llms import HuggingFaceTextGenInference
inference_server_url = f'https://{pod["id"]}-80.proxy.runpod.net'
llm = HuggingFaceTextGenInference(
inference_server_url=inference_server_url,
max_new_tokens=100,
top_k=10,
top_p=0.95,
typical_p=0.95,
temperature=0.001,
repetition_penalty=1.03,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment