Last active
February 1, 2024 10:13
-
-
Save M0r13n/8fc99f408995241c8f9314cda340204f to your computer and use it in GitHub Desktop.
llama_index local model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from llama_index.llms import HuggingFaceLLM | |
from llama_index.prompts import PromptTemplate | |
selected_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1' | |
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow: | |
- Generate human readable output, avoid creating output with gibberish text. | |
- Generate only the requested output, don't include any other language before or after the requested output. | |
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly. | |
- Generate professional language typically used in business documents in North America. | |
- Never generate offensive or foul language. | |
""" | |
query_wrapper_prompt = PromptTemplate( | |
"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] " | |
) | |
llm = HuggingFaceLLM( | |
context_window=4096, | |
max_new_tokens=2048, | |
generate_kwargs={"temperature": 0.0, "do_sample": False}, | |
query_wrapper_prompt=query_wrapper_prompt, | |
tokenizer_name=selected_model, | |
model_name=selected_model, | |
device_map="auto", | |
# change these settings below depending on your GPU | |
model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}, | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
accelerate==0.26.1 | |
aiohttp==3.9.3 | |
aiosignal==1.3.1 | |
annotated-types==0.6.0 | |
anyio==4.2.0 | |
async-timeout==4.0.3 | |
attrs==23.2.0 | |
certifi==2023.11.17 | |
charset-normalizer==3.3.2 | |
click==8.1.7 | |
dataclasses-json==0.6.3 | |
Deprecated==1.2.14 | |
dirtyjson==1.0.8 | |
distro==1.9.0 | |
exceptiongroup==1.2.0 | |
filelock==3.13.1 | |
frozenlist==1.4.1 | |
fsspec==2023.12.2 | |
greenlet==3.0.3 | |
h11==0.14.0 | |
httpcore==1.0.2 | |
httpx==0.26.0 | |
huggingface-hub==0.20.3 | |
idna==3.6 | |
Jinja2==3.1.3 | |
joblib==1.3.2 | |
llama-index==0.9.40 | |
MarkupSafe==2.1.4 | |
marshmallow==3.20.2 | |
mpmath==1.3.0 | |
multidict==6.0.4 | |
mypy-extensions==1.0.0 | |
nest-asyncio==1.6.0 | |
networkx==3.2.1 | |
nltk==3.8.1 | |
numpy==1.26.3 | |
nvidia-cublas-cu12==12.1.3.1 | |
nvidia-cuda-cupti-cu12==12.1.105 | |
nvidia-cuda-nvrtc-cu12==12.1.105 | |
nvidia-cuda-runtime-cu12==12.1.105 | |
nvidia-cudnn-cu12==8.9.2.26 | |
nvidia-cufft-cu12==11.0.2.54 | |
nvidia-curand-cu12==10.3.2.106 | |
nvidia-cusolver-cu12==11.4.5.107 | |
nvidia-cusparse-cu12==12.1.0.106 | |
nvidia-nccl-cu12==2.19.3 | |
nvidia-nvjitlink-cu12==12.3.101 | |
nvidia-nvtx-cu12==12.1.105 | |
openai==1.10.0 | |
packaging==23.2 | |
pandas==2.2.0 | |
psutil==5.9.8 | |
pydantic==2.6.0 | |
pydantic_core==2.16.1 | |
python-dateutil==2.8.2 | |
pytz==2023.4 | |
PyYAML==6.0.1 | |
regex==2023.12.25 | |
requests==2.31.0 | |
safetensors==0.4.2 | |
six==1.16.0 | |
sniffio==1.3.0 | |
SQLAlchemy==2.0.25 | |
sympy==1.12 | |
tenacity==8.2.3 | |
tiktoken==0.5.2 | |
tokenizers==0.15.1 | |
torch==2.2.0 | |
tqdm==4.66.1 | |
transformers==4.37.2 | |
triton==2.2.0 | |
typing-inspect==0.9.0 | |
typing_extensions==4.9.0 | |
tzdata==2023.4 | |
urllib3==2.2.0 | |
wrapt==1.16.0 | |
yarl==1.9.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
RuntimeError: No GPU found. A GPU is needed for quantization.