M0r13n/index.py

## index.py
import torch
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts import PromptTemplate

selected_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
)

## requirements.txt
accelerate==0.26.1
aiohttp==3.9.3
aiosignal==1.3.1
annotated-types==0.6.0
anyio==4.2.0
async-timeout==4.0.3
attrs==23.2.0
certifi==2023.11.17
charset-normalizer==3.3.2
click==8.1.7
dataclasses-json==0.6.3
Deprecated==1.2.14
dirtyjson==1.0.8
distro==1.9.0
exceptiongroup==1.2.0
filelock==3.13.1
frozenlist==1.4.1
fsspec==2023.12.2
greenlet==3.0.3
h11==0.14.0
httpcore==1.0.2
httpx==0.26.0
huggingface-hub==0.20.3
idna==3.6
Jinja2==3.1.3
joblib==1.3.2
llama-index==0.9.40
MarkupSafe==2.1.4
marshmallow==3.20.2
mpmath==1.3.0
multidict==6.0.4
mypy-extensions==1.0.0
nest-asyncio==1.6.0
networkx==3.2.1
nltk==3.8.1
numpy==1.26.3
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.19.3
nvidia-nvjitlink-cu12==12.3.101
nvidia-nvtx-cu12==12.1.105
openai==1.10.0
packaging==23.2
pandas==2.2.0
psutil==5.9.8
pydantic==2.6.0
pydantic_core==2.16.1
python-dateutil==2.8.2
pytz==2023.4
PyYAML==6.0.1
regex==2023.12.25
requests==2.31.0
safetensors==0.4.2
six==1.16.0
sniffio==1.3.0
SQLAlchemy==2.0.25
sympy==1.12
tenacity==8.2.3
tiktoken==0.5.2
tokenizers==0.15.1
torch==2.2.0
tqdm==4.66.1
transformers==4.37.2
triton==2.2.0
typing-inspect==0.9.0
typing_extensions==4.9.0
tzdata==2023.4
urllib3==2.2.0
wrapt==1.16.0
yarl==1.9.4
	import torch
	from llama_index.llms import HuggingFaceLLM
	from llama_index.prompts import PromptTemplate

	selected_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

	SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
	- Generate human readable output, avoid creating output with gibberish text.
	- Generate only the requested output, don't include any other language before or after the requested output.
	- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
	- Generate professional language typically used in business documents in North America.
	- Never generate offensive or foul language.
	"""

	query_wrapper_prompt = PromptTemplate(
	"[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
	)

	llm = HuggingFaceLLM(
	context_window=4096,
	max_new_tokens=2048,
	generate_kwargs={"temperature": 0.0, "do_sample": False},
	query_wrapper_prompt=query_wrapper_prompt,
	tokenizer_name=selected_model,
	model_name=selected_model,
	device_map="auto",
	# change these settings below depending on your GPU
	model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
	)
	accelerate==0.26.1
	aiohttp==3.9.3
	aiosignal==1.3.1
	annotated-types==0.6.0
	anyio==4.2.0
	async-timeout==4.0.3
	attrs==23.2.0
	certifi==2023.11.17
	charset-normalizer==3.3.2
	click==8.1.7
	dataclasses-json==0.6.3
	Deprecated==1.2.14
	dirtyjson==1.0.8
	distro==1.9.0
	exceptiongroup==1.2.0
	filelock==3.13.1
	frozenlist==1.4.1
	fsspec==2023.12.2
	greenlet==3.0.3
	h11==0.14.0
	httpcore==1.0.2
	httpx==0.26.0
	huggingface-hub==0.20.3
	idna==3.6
	Jinja2==3.1.3
	joblib==1.3.2
	llama-index==0.9.40
	MarkupSafe==2.1.4
	marshmallow==3.20.2
	mpmath==1.3.0
	multidict==6.0.4
	mypy-extensions==1.0.0
	nest-asyncio==1.6.0
	networkx==3.2.1
	nltk==3.8.1
	numpy==1.26.3
	nvidia-cublas-cu12==12.1.3.1
	nvidia-cuda-cupti-cu12==12.1.105
	nvidia-cuda-nvrtc-cu12==12.1.105
	nvidia-cuda-runtime-cu12==12.1.105
	nvidia-cudnn-cu12==8.9.2.26
	nvidia-cufft-cu12==11.0.2.54
	nvidia-curand-cu12==10.3.2.106
	nvidia-cusolver-cu12==11.4.5.107
	nvidia-cusparse-cu12==12.1.0.106
	nvidia-nccl-cu12==2.19.3
	nvidia-nvjitlink-cu12==12.3.101
	nvidia-nvtx-cu12==12.1.105
	openai==1.10.0
	packaging==23.2
	pandas==2.2.0
	psutil==5.9.8
	pydantic==2.6.0
	pydantic_core==2.16.1
	python-dateutil==2.8.2
	pytz==2023.4
	PyYAML==6.0.1
	regex==2023.12.25
	requests==2.31.0
	safetensors==0.4.2
	six==1.16.0
	sniffio==1.3.0
	SQLAlchemy==2.0.25
	sympy==1.12
	tenacity==8.2.3
	tiktoken==0.5.2
	tokenizers==0.15.1
	torch==2.2.0
	tqdm==4.66.1
	transformers==4.37.2
	triton==2.2.0
	typing-inspect==0.9.0
	typing_extensions==4.9.0
	tzdata==2023.4
	urllib3==2.2.0
	wrapt==1.16.0
	yarl==1.9.4