Last active
July 5, 2023 12:56
-
-
Save devinSpitz/73cd7037b82d7acbe70ddf4d1c61ba4a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#logging | |
import logging; | |
logname = "log.txt"; | |
logging.basicConfig( | |
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', | |
datefmt='%H:%M:%S', | |
level=logging.INFO, | |
handlers=[ | |
logging.FileHandler(logname), | |
logging.StreamHandler() | |
]) | |
# imports | |
import sys | |
import torch | |
from peft import PeftModel | |
import transformers | |
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline, AutoModelForTokenClassification | |
from langchain.llms import HuggingFacePipeline | |
from langchain.llms.base import LLM | |
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, PromptHelper, LLMPredictor, ServiceContext | |
from typing import Optional, List, Mapping, Any | |
from peft import PeftModel | |
from langchain.agents import load_tools | |
from langchain.agents import initialize_agent | |
#base_model="decapoda-research/llama-7b-hf"; | |
#base_model="circulus/alpaca-7b"; | |
base_model="circulus/alpaca-7b"; | |
#base_model="decapoda-research/llama-13b-hf"; | |
modelName = base_model; | |
# lora_weights = "chansung/alpaca-lora-13b"; | |
# lora_weights = "baruga/alpaca-lora-13b"; | |
# lora_weights = "mattreid/alpaca-lora-13b"; | |
# lora_weights = "Dogge/alpaca-lora-13b"; | |
# lora_weights = "circulus/alpaca-lora-13b"; | |
# lora_weights = "daviddmc/lpaca-lora-13b"; | |
#lora_weights = "circulus/alpaca-lora-7b"; | |
#lora_weights = "tloen/alpaca-lora-7b"; | |
lora_weights = "circulus/alpaca-lora-7b"; | |
#lora_weights = "chansung/alpaca-lora-13b"; | |
if torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
device = "cpu" | |
try: | |
if torch.backends.mps.is_available(): | |
device = "mps" | |
except: # noqa: E722 | |
pass | |
load_8bit = True; | |
tokenizer = LlamaTokenizer.from_pretrained(base_model) | |
print(device) | |
if device == "cuda": | |
model = LlamaForCausalLM.from_pretrained( | |
base_model, | |
load_in_8bit=load_8bit, | |
torch_dtype=torch.float16, | |
device_map={'': 0}, | |
) | |
model = PeftModel.from_pretrained( | |
model, | |
lora_weights, | |
torch_dtype=torch.float16, | |
) | |
elif device == "mps": | |
model = LlamaForCausalLM.from_pretrained( | |
base_model, | |
device_map={"": device}, | |
torch_dtype=torch.float16, | |
) | |
model = PeftModel.from_pretrained( | |
model, | |
lora_weights, | |
device_map={"": device}, | |
torch_dtype=torch.float16, | |
) | |
else: | |
model = LlamaForCausalLM.from_pretrained( | |
base_model, device_map={"": device}, low_cpu_mem_usage=True | |
) | |
model = PeftModel.from_pretrained( | |
model, | |
lora_weights, | |
device_map={"": device}, | |
) | |
# unwind broken decapoda-research config | |
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk | |
model.config.bos_token_id = 1 | |
model.config.eos_token_id = 2 | |
if not load_8bit: | |
model.half() # seems to fix bugs for some users. | |
model.eval() | |
if torch.__version__ >= "2" and sys.platform != "win32": | |
model = torch.compile(model) | |
# | |
# define prompt helper | |
# set maximum input size | |
max_input_size = 2048 | |
# set number of output tokens | |
num_output = 200 | |
# set maximum chunk overlap | |
max_chunk_overlap = 20 | |
pipe = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_length=2000, | |
temperature=0.1, | |
num_beams=1, | |
top_p=0.95, | |
repetition_penalty=1.2 | |
) | |
local_llm = HuggingFacePipeline(pipeline=pipe) | |
# tools = load_tools(["llm-math"],llm=local_llm) | |
# agend = initialize_agent(tools,llm=local_llm,agent="zero-shot-react-description",verbose=True) | |
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap) | |
llm_predictor = LLMPredictor(llm=local_llm) | |
service_context = ServiceContext.from_defaults( | |
llm_predictor, prompt_helper) | |
documents = SimpleDirectoryReader('./data').load_data() | |
index = GPTListIndex.from_documents( | |
documents, service_context=service_context) | |
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request. | |
### Instruction: | |
{instruction} | |
Answer:""" | |
from langchain import PromptTemplate, LLMChain | |
prompt = PromptTemplate(template=template, input_variables=["instruction"]) | |
llm_chain = LLMChain(prompt=prompt, | |
llm=local_llm) | |
def evaluate( | |
instruction | |
): | |
instruction = template.replace("{instruction}",instruction) | |
#return index.query(instruction) | |
#return agend.run(instruction) | |
logging.info("___________Promt:____________"); | |
logging.info(instruction); | |
logging.info("_________Normal model____________"); | |
logging.info(llm_chain.run(instruction)); | |
logging.info("_________After Documents___________"); | |
logging.info(index.query(instruction)); | |
logging.info("______________________________"); | |
evaluate("What do you think of Facebook's LLaMa?") | |
evaluate("How many people lives in Martos?") | |
evaluate("What is the capital of England?") | |
evaluate("What are alpacas? and how are they different from llamas?") | |
evaluate("how much is 213769*121239?") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment