Joao Gante gante

## llama2_compile.py
# `torch.compile` enabled Llama 2 🏎️
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch, time

tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0", device_map="auto", torch_dtype=torch.float16
)
model.forward = torch.compile(model.forward, fullgraph=True, mode="reduce-overhead")

## galactica_contrastive_search.py
from transformers import AutoTokenizer, OPTForCausalLM

tokenizer = AutoTokenizer.from_pretrained("facebook/galactica-1.3b")
model = OPTForCausalLM.from_pretrained("facebook/galactica-1.3b", device_map="auto")

# input_text = "Question: How small is a human cell? Answer:"  # they should get the same short answers
input_text = "Question: What do Maxwell's equations represent? Answer:"  # better with repetitions
# input_text = "Question: Simplify the following Python code using math:```pythondef calc_sum(n):    i = 0    s = 0    while i <= n:        s += i        i += 1    return s```Answer:"  # better with early stop
# input_text = "Question: What technology will revolutionize language models? Answer:"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

## benchmark_whisper.py
import time
from datetime import timedelta
from functools import wraps
from tqdm import tqdm

# PyTorch imports and settings
import torch
from transformers.testing_utils import torch_device
torch.backends.cuda.matmul.allow_tf32 = True  # All frameworks using TF32

## pt_img_gen.py
from diffusers import StableDiffusionPipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from torch import autocast

PT_PROMPT = "Um gato com um chapéu, pintura a aguarelas"  # A cat with a hat, watercolor painting

# translation PT -> EN
transl_model_id = "Narrativa/mbart-large-50-finetuned-opus-pt-en-translation"
tokenizer = AutoTokenizer.from_pretrained(transl_model_id)
text_model = AutoModelForSeq2SeqLM.from_pretrained(transl_model_id)

## generate_benchmark.py
import os
import time
from datetime import timedelta
from functools import wraps, partial
from tqdm import tqdm

# JAX imports and settings
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
import jax
	# `torch.compile` enabled Llama 2 🏎️
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch, time

	tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	model = AutoModelForCausalLM.from_pretrained(
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0", device_map="auto", torch_dtype=torch.float16
	)
	model.forward = torch.compile(model.forward, fullgraph=True, mode="reduce-overhead")
	from transformers import AutoTokenizer, OPTForCausalLM

	tokenizer = AutoTokenizer.from_pretrained("facebook/galactica-1.3b")
	model = OPTForCausalLM.from_pretrained("facebook/galactica-1.3b", device_map="auto")

	# input_text = "Question: How small is a human cell? Answer:" # they should get the same short answers
	input_text = "Question: What do Maxwell's equations represent? Answer:" # better with repetitions
	# input_text = "Question: Simplify the following Python code using math:```pythondef calc_sum(n): i = 0 s = 0 while i <= n: s += i i += 1 return s```Answer:" # better with early stop
	# input_text = "Question: What technology will revolutionize language models? Answer:"
	input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
	import time
	from datetime import timedelta
	from functools import wraps
	from tqdm import tqdm

	# PyTorch imports and settings
	import torch
	from transformers.testing_utils import torch_device
	torch.backends.cuda.matmul.allow_tf32 = True # All frameworks using TF32
	from diffusers import StableDiffusionPipeline
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from torch import autocast

	PT_PROMPT = "Um gato com um chapéu, pintura a aguarelas" # A cat with a hat, watercolor painting

	# translation PT -> EN
	transl_model_id = "Narrativa/mbart-large-50-finetuned-opus-pt-en-translation"
	tokenizer = AutoTokenizer.from_pretrained(transl_model_id)
	text_model = AutoModelForSeq2SeqLM.from_pretrained(transl_model_id)
	import os
	import time
	from datetime import timedelta
	from functools import wraps, partial
	from tqdm import tqdm

	# JAX imports and settings
	os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
	import jax