Skip to content

Instantly share code, notes, and snippets.

@tudoanh
Created April 22, 2024 02:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tudoanh/400765880f0eb8b9b45d3ed2ff96f086 to your computer and use it in GitHub Desktop.
Save tudoanh/400765880f0eb8b9b45d3ed2ff96f086 to your computer and use it in GitHub Desktop.
Run Llama 3 8B with Llamafile
import requests
import subprocess
import os
import signal
def download_file(url, target_path):
response = requests.get(url, stream=True)
with open(target_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
def setup_and_run():
# Define file URLs and paths
llama_exec_url = "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.7.3/llamafile-0.7.3"
llama_exec_path = "llamafile-0.7.3"
model_url = "https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q4_0.gguf?download=true"
model_path = "dolphin-2.9-llama3-8b.Q4_0.gguf"
# Download llamafile executable
print("Downloading llamafile executable...")
download_file(llama_exec_url, llama_exec_path)
# Download model file
print("Downloading model file...")
download_file(model_url, model_path)
# Set the executable permission for llamafile
os.chmod(llama_exec_path, 0o755)
print("Permissions set: executable")
# Prepare the command to run
cmd = f"./{llama_exec_path} -m {model_path} -ngl 15 --port 8080 --host 0.0.0.0"
print("Starting the server with command:")
print(cmd)
# Run the command
process = subprocess.Popen(cmd, shell=True)
print("Server running... Press CTRL-C to stop.")
try:
# Wait for process to complete or user to kill it
process.wait()
except KeyboardInterrupt:
print("CTRL-C received. Stopping the server...")
finally:
# Terminate the process if still running
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
print("Forcing process termination...")
process.kill()
print("Server stopped.")
if __name__ == "__main__":
setup_and_run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment