Skip to content

Instantly share code, notes, and snippets.

@chand1012
Created December 16, 2023 16:37
Show Gist options
  • Save chand1012/3479c98fd4e77cabc767f19c2d900439 to your computer and use it in GitHub Desktop.
Save chand1012/3479c98fd4e77cabc767f19c2d900439 to your computer and use it in GitHub Desktop.
from typing import Any, List, Mapping, Optional
# pip install httpx langchain
import httpx
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
class LlamaCppServer(LLM):
'''Use Llama.cpp's builtin server to remotely host an LLM for use with Langchain'''
base_url: str
@property
def _llm_type(self) -> str:
return "llama_cpp_server"
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"base_url": self.base_url}
def _call(self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> str:
req_body = {
"stream": False,
"n_predict": 1000,
"temperature": 0.7,
"repeat_last_n": 256,
"repeat_penalty": 1.18,
"top_k": 40,
"top_p": 0.5,
"tfs_z": 1,
"typical_p": 1,
"presence_penalty": 0,
"frequency_penalty": 0,
"mirostat": 0,
"mirostat_tau": 5,
"mirostat_eta": 0.1,
"grammar": "",
"n_probs": 0,
"image_data": [],
"cache_prompt": False,
"slot_id": -1,
}
# overwrite any params that were passed in
for k, v in kwargs.items():
req_body[k] = v
req_body["prompt"] = prompt
if stop is not None:
req_body["stop"] = stop
else:
req_body["stop"] = []
resp = httpx.post(self.base_url + "/completion", json=req_body,
timeout=None, headers={"Content-Type": "application/json"})
resp.raise_for_status()
data: str = resp.json()["content"]
return data.strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment