Skip to content

Instantly share code, notes, and snippets.

View zucchini-nlp's full-sized avatar
🦄
To code or nor to code, that is the question

Raushan Turganbay zucchini-nlp

🦄
To code or nor to code, that is the question
View GitHub Profile
from transformers import AutoProcessor
models = [
"llava-hf/vip-llava-13b-hf",
"llava-hf/vip-llava-7b-hf",
"llava-hf/llava-1.5-7b-hf",
"llava-hf/llava-1.5-13b-hf",
"llava-hf/bakLlava-v1-hf",
"llava-hf/llava-v1.6-mistral-7b-hf",
"llava-hf/llava-v1.6-vicuna-7b-hf",
@zucchini-nlp
zucchini-nlp / update_blip.py
Last active June 10, 2024 07:54
Update BLIP-2 model for new version
# Load your model and processor and run the following to update BLIP-2 model
# It will update file in your repo by adding new args in configs and resizing embedding layer
# Then you'll be able to run BLIP-2 without warnings/errors
processor.num_query_tokens = model.config.num_query_tokens
model.resize_token_embeddings(processor.tokenizer.vocab_size, pad_to_multiple_of=64) # pad for efficient computation
model.config.image_token_index = processor.tokenizer.vocab_size
model.push_to_hub("YOUR-REPO")
processor.push_to_hub("YOUR-REPO")
@zucchini-nlp
zucchini-nlp / benchmark_compile.py
Created May 15, 2024 09:35
A script to benchmark torch compiled models' generation quality
import os
import argparse
import torch
import torch._dynamo.config
import torch._inductor.config
from transformers import AutoModelForCausalLM, AutoTokenizer
os.environ["TOKENIZERS_PARALLELISM"] = "0"
@zucchini-nlp
zucchini-nlp / bechmark_latency_memory.py
Last active June 17, 2024 03:37
Script to benchmark the latency and memory consumption of different cache implementations
# tested on https://github.com/zucchini-nlp/transformers/tree/quant (commit_id 5f3046a)
import os
import argparse
from pathlib import Path
from time import perf_counter
import numpy as np
from matplotlib import pyplot as plt
@zucchini-nlp
zucchini-nlp / perplexity.py
Last active May 9, 2024 14:06
Calculate the perplexity of Llama with different cache implementations
"""
Adapted from https://github.com/mit-han-lab/streaming-llm
Note: Although this script measures latency, it is not optimized whatsoever!
The latency is only tracked to see the impact of speed over time.
Usage:
python benchmark/perplexity.py --experiment dynamicCacheInt4 --cache_implementation dynamic
python benchmark/perplexity.py --experiment quantCacheInt4 --cache_implementation quantized --nbits 2
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from tqdm import tqdm
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, attn_implementation="eager").to("cuda:0")
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@zucchini-nlp
zucchini-nlp / code_vulnerabilities_gpt.ipynb
Created November 6, 2023 11:59
CWE Vulnerability Detection with GPT: Streamline code vulnerability discovery within a set of CWE standards. This approach utilizes GPT prompting to provide effective and efficient identification of code vulnerabilities, contributing to improved security.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@zucchini-nlp
zucchini-nlp / bash_scaffolding_gpt4.py
Created October 31, 2023 07:53
GPT-4 Bash Scaffold: A Python script that uses GPT-4 to interact with users in a command-line interface, allowing the execution of Bash commands enclosed in <bash> tags.
import re
import time
import openai
import tiktoken
import subprocess
from getkey import getkey, key
tokenizer = tiktoken.encoding_for_model("gpt-4")
get_tokens = lambda x: len(tokenizer.encode(x))
openai.api_key = "YOUR API KEY"
@zucchini-nlp
zucchini-nlp / xapian_wiki_index.py
Created October 18, 2023 13:48
Python script: Retrieve Wikipedia paragraphs, preprocess, and index with Xapian for efficient term-based searches.
import re
import json
import pickle
import requests
import xapian
import nltk
# Overwrites the DB if exists. If you want to add to exsiting DB, use xapian.DB_CREATE_OR_OPEN
db = xapian.WritableDatabase("./toy_db", xapian.DB_CREATE_OR_OVERWRITE)