Raushan Turganbay zucchini-nlp

## add_chat_templates_llava.py
from transformers import AutoProcessor

models = [
    "llava-hf/vip-llava-13b-hf",
    "llava-hf/vip-llava-7b-hf",
    "llava-hf/llava-1.5-7b-hf",
    "llava-hf/llava-1.5-13b-hf",
    "llava-hf/bakLlava-v1-hf",
    "llava-hf/llava-v1.6-mistral-7b-hf",
    "llava-hf/llava-v1.6-vicuna-7b-hf",

## update_blip.py
# Load your model and processor and run the following to update BLIP-2 model
# It will update file in your repo by adding new args in configs and resizing embedding layer
# Then you'll be able to run BLIP-2 without warnings/errors

processor.num_query_tokens = model.config.num_query_tokens
model.resize_token_embeddings(processor.tokenizer.vocab_size, pad_to_multiple_of=64) # pad for efficient computation
model.config.image_token_index = processor.tokenizer.vocab_size

model.push_to_hub("YOUR-REPO")
processor.push_to_hub("YOUR-REPO")

## benchmark_compile.py
import os
import argparse

import torch
import torch._dynamo.config
import torch._inductor.config
from transformers import AutoModelForCausalLM, AutoTokenizer

os.environ["TOKENIZERS_PARALLELISM"] = "0"

## bechmark_latency_memory.py
# tested on https://github.com/zucchini-nlp/transformers/tree/quant (commit_id 5f3046a)

import os
import argparse
from pathlib import Path
from time import perf_counter

import numpy as np
from matplotlib import pyplot as plt

## perplexity.py
"""
Adapted from https://github.com/mit-han-lab/streaming-llm

Note: Although this script measures latency, it is not optimized whatsoever!
The latency is only tracked to see the impact of speed over time.

Usage:

python benchmark/perplexity.py --experiment dynamicCacheInt4 --cache_implementation dynamic
python benchmark/perplexity.py --experiment quantCacheInt4 --cache_implementation quantized --nbits 2

## bench_lm.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from tqdm import tqdm


tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, attn_implementation="eager").to("cuda:0")

## t5_soft_promp_tuning.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zucchini-nlp
                / t5_soft_promp_tuning.ipynb
            
            
              Last active
              December 2, 2023 19:13
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## code_vulnerabilities_gpt.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zucchini-nlp
                / code_vulnerabilities_gpt.ipynb
            
            
              Created
              November 6, 2023 11:59
            
              
                CWE Vulnerability Detection with GPT: Streamline code vulnerability discovery within a set of CWE standards. This approach utilizes GPT prompting to provide effective and efficient identification of code vulnerabilities, contributing to improved security.
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## bash_scaffolding_gpt4.py
import re
import time
import openai
import tiktoken
import subprocess
from getkey import getkey, key

tokenizer = tiktoken.encoding_for_model("gpt-4")
get_tokens = lambda x: len(tokenizer.encode(x))
openai.api_key = "YOUR API KEY"

## xapian_wiki_index.py
import re
import json
import pickle
import requests

import xapian
import nltk

# Overwrites the DB if exists. If you want to add to exsiting DB, use xapian.DB_CREATE_OR_OPEN
db = xapian.WritableDatabase("./toy_db", xapian.DB_CREATE_OR_OVERWRITE)
	from transformers import AutoProcessor

	models = [
	"llava-hf/vip-llava-13b-hf",
	"llava-hf/vip-llava-7b-hf",
	"llava-hf/llava-1.5-7b-hf",
	"llava-hf/llava-1.5-13b-hf",
	"llava-hf/bakLlava-v1-hf",
	"llava-hf/llava-v1.6-mistral-7b-hf",
	"llava-hf/llava-v1.6-vicuna-7b-hf",
	# Load your model and processor and run the following to update BLIP-2 model
	# It will update file in your repo by adding new args in configs and resizing embedding layer
	# Then you'll be able to run BLIP-2 without warnings/errors

	processor.num_query_tokens = model.config.num_query_tokens
	model.resize_token_embeddings(processor.tokenizer.vocab_size, pad_to_multiple_of=64) # pad for efficient computation
	model.config.image_token_index = processor.tokenizer.vocab_size

	model.push_to_hub("YOUR-REPO")
	processor.push_to_hub("YOUR-REPO")
	import os
	import argparse

	import torch
	import torch._dynamo.config
	import torch._inductor.config
	from transformers import AutoModelForCausalLM, AutoTokenizer

	os.environ["TOKENIZERS_PARALLELISM"] = "0"
	# tested on https://github.com/zucchini-nlp/transformers/tree/quant (commit_id 5f3046a)

	import os
	import argparse
	from pathlib import Path
	from time import perf_counter

	import numpy as np
	from matplotlib import pyplot as plt
	"""
	Adapted from https://github.com/mit-han-lab/streaming-llm

	Note: Although this script measures latency, it is not optimized whatsoever!
	The latency is only tracked to see the impact of speed over time.

	Usage:

	python benchmark/perplexity.py --experiment dynamicCacheInt4 --cache_implementation dynamic
	python benchmark/perplexity.py --experiment quantCacheInt4 --cache_implementation quantized --nbits 2
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from datasets import load_dataset
	from tqdm import tqdm


	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
	tokenizer.pad_token_id = tokenizer.eos_token_id
	model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", torch_dtype=torch.float16, attn_implementation="eager").to("cuda:0")
	import re
	import time
	import openai
	import tiktoken
	import subprocess
	from getkey import getkey, key

	tokenizer = tiktoken.encoding_for_model("gpt-4")
	get_tokens = lambda x: len(tokenizer.encode(x))
	openai.api_key = "YOUR API KEY"
	import re
	import json
	import pickle
	import requests

	import xapian
	import nltk

	# Overwrites the DB if exists. If you want to add to exsiting DB, use xapian.DB_CREATE_OR_OPEN
	db = xapian.WritableDatabase("./toy_db", xapian.DB_CREATE_OR_OVERWRITE)