This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoProcessor | |
models = [ | |
"llava-hf/vip-llava-13b-hf", | |
"llava-hf/vip-llava-7b-hf", | |
"llava-hf/llava-1.5-7b-hf", | |
"llava-hf/llava-1.5-13b-hf", | |
"llava-hf/bakLlava-v1-hf", | |
"llava-hf/llava-v1.6-mistral-7b-hf", | |
"llava-hf/llava-v1.6-vicuna-7b-hf", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load your model and processor and run the following to update BLIP-2 model | |
# It will update file in your repo by adding new args in configs and resizing embedding layer | |
# Then you'll be able to run BLIP-2 without warnings/errors | |
processor.num_query_tokens = model.config.num_query_tokens | |
model.resize_token_embeddings(processor.tokenizer.vocab_size, pad_to_multiple_of=64) # pad for efficient computation | |
model.config.image_token_index = processor.tokenizer.vocab_size | |
model.push_to_hub("YOUR-REPO") | |
processor.push_to_hub("YOUR-REPO") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import torch | |
import torch._dynamo.config | |
import torch._inductor.config | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
os.environ["TOKENIZERS_PARALLELISM"] = "0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tested on https://github.com/zucchini-nlp/transformers/tree/quant (commit_id 5f3046a) | |
import os | |
import argparse | |
from pathlib import Path | |
from time import perf_counter | |
import numpy as np | |
from matplotlib import pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Adapted from https://github.com/mit-han-lab/streaming-llm | |
Note: Although this script measures latency, it is not optimized whatsoever! | |
The latency is only tracked to see the impact of speed over time. | |
Usage: | |
python benchmark/perplexity.py --experiment dynamicCacheInt4 --cache_implementation dynamic | |
python benchmark/perplexity.py --experiment quantCacheInt4 --cache_implementation quantized --nbits 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
from datasets import load_dataset | |
from tqdm import tqdm | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") | |
tokenizer.pad_token_id = tokenizer.eos_token_id | |
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, attn_implementation="eager").to("cuda:0") |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import time | |
import openai | |
import tiktoken | |
import subprocess | |
from getkey import getkey, key | |
tokenizer = tiktoken.encoding_for_model("gpt-4") | |
get_tokens = lambda x: len(tokenizer.encode(x)) | |
openai.api_key = "YOUR API KEY" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
import pickle | |
import requests | |
import xapian | |
import nltk | |
# Overwrites the DB if exists. If you want to add to exsiting DB, use xapian.DB_CREATE_OR_OPEN | |
db = xapian.WritableDatabase("./toy_db", xapian.DB_CREATE_OR_OVERWRITE) |