Skip to content

Instantly share code, notes, and snippets.

@AGInfer
Last active February 21, 2024 20:22
Show Gist options
  • Save AGInfer/1ffe88623c0b903fb594425a966c46ae to your computer and use it in GitHub Desktop.
Save AGInfer/1ffe88623c0b903fb594425a966c46ae to your computer and use it in GitHub Desktop.
from vllm import LLM, SamplingParams
class InferlessPythonModel:
def initialize(self):
self.sampling_params = SamplingParams(temperature=0.7, top_p=0.95,max_tokens=256)
self.llm = LLM(model="TheBloke/Mistral-7B-v0.1-AWQ", quantization="awq", dtype="float16",max_model_len=2048, gpu_memory_utilization=0.9)
def infer(self, inputs):
prompts = inputs["prompt"]
result = self.llm.generate(prompts, self.sampling_params)
result_output = [output.outputs[0].text for output in result]
return {'gresult': result_output[0]}
def finalize(self):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment