Skip to content

Instantly share code, notes, and snippets.

@devinSpitz
Last active July 5, 2023 12:56
Show Gist options
  • Save devinSpitz/73cd7037b82d7acbe70ddf4d1c61ba4a to your computer and use it in GitHub Desktop.
Save devinSpitz/73cd7037b82d7acbe70ddf4d1c61ba4a to your computer and use it in GitHub Desktop.
#logging
import logging;
logname = "log.txt";
logging.basicConfig(
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
datefmt='%H:%M:%S',
level=logging.INFO,
handlers=[
logging.FileHandler(logname),
logging.StreamHandler()
])
# imports
import sys
import torch
from peft import PeftModel
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline, AutoModelForTokenClassification
from langchain.llms import HuggingFacePipeline
from langchain.llms.base import LLM
from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, PromptHelper, LLMPredictor, ServiceContext
from typing import Optional, List, Mapping, Any
from peft import PeftModel
from langchain.agents import load_tools
from langchain.agents import initialize_agent
#base_model="decapoda-research/llama-7b-hf";
#base_model="circulus/alpaca-7b";
base_model="circulus/alpaca-7b";
#base_model="decapoda-research/llama-13b-hf";
modelName = base_model;
# lora_weights = "chansung/alpaca-lora-13b";
# lora_weights = "baruga/alpaca-lora-13b";
# lora_weights = "mattreid/alpaca-lora-13b";
# lora_weights = "Dogge/alpaca-lora-13b";
# lora_weights = "circulus/alpaca-lora-13b";
# lora_weights = "daviddmc/lpaca-lora-13b";
#lora_weights = "circulus/alpaca-lora-7b";
#lora_weights = "tloen/alpaca-lora-7b";
lora_weights = "circulus/alpaca-lora-7b";
#lora_weights = "chansung/alpaca-lora-13b";
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
try:
if torch.backends.mps.is_available():
device = "mps"
except: # noqa: E722
pass
load_8bit = True;
tokenizer = LlamaTokenizer.from_pretrained(base_model)
print(device)
if device == "cuda":
model = LlamaForCausalLM.from_pretrained(
base_model,
load_in_8bit=load_8bit,
torch_dtype=torch.float16,
device_map={'': 0},
)
model = PeftModel.from_pretrained(
model,
lora_weights,
torch_dtype=torch.float16,
)
elif device == "mps":
model = LlamaForCausalLM.from_pretrained(
base_model,
device_map={"": device},
torch_dtype=torch.float16,
)
model = PeftModel.from_pretrained(
model,
lora_weights,
device_map={"": device},
torch_dtype=torch.float16,
)
else:
model = LlamaForCausalLM.from_pretrained(
base_model, device_map={"": device}, low_cpu_mem_usage=True
)
model = PeftModel.from_pretrained(
model,
lora_weights,
device_map={"": device},
)
# unwind broken decapoda-research config
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
model.config.bos_token_id = 1
model.config.eos_token_id = 2
if not load_8bit:
model.half() # seems to fix bugs for some users.
model.eval()
if torch.__version__ >= "2" and sys.platform != "win32":
model = torch.compile(model)
#
# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 200
# set maximum chunk overlap
max_chunk_overlap = 20
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=2000,
temperature=0.1,
num_beams=1,
top_p=0.95,
repetition_penalty=1.2
)
local_llm = HuggingFacePipeline(pipeline=pipe)
# tools = load_tools(["llm-math"],llm=local_llm)
# agend = initialize_agent(tools,llm=local_llm,agent="zero-shot-react-description",verbose=True)
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
llm_predictor = LLMPredictor(llm=local_llm)
service_context = ServiceContext.from_defaults(
llm_predictor, prompt_helper)
documents = SimpleDirectoryReader('./data').load_data()
index = GPTListIndex.from_documents(
documents, service_context=service_context)
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
Answer:"""
from langchain import PromptTemplate, LLMChain
prompt = PromptTemplate(template=template, input_variables=["instruction"])
llm_chain = LLMChain(prompt=prompt,
llm=local_llm)
def evaluate(
instruction
):
instruction = template.replace("{instruction}",instruction)
#return index.query(instruction)
#return agend.run(instruction)
logging.info("___________Promt:____________");
logging.info(instruction);
logging.info("_________Normal model____________");
logging.info(llm_chain.run(instruction));
logging.info("_________After Documents___________");
logging.info(index.query(instruction));
logging.info("______________________________");
evaluate("What do you think of Facebook's LLaMa?")
evaluate("How many people lives in Martos?")
evaluate("What is the capital of England?")
evaluate("What are alpacas? and how are they different from llamas?")
evaluate("how much is 213769*121239?")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment