Skip to content

Instantly share code, notes, and snippets.

@sahil280114
Created December 19, 2023 10:39
Show Gist options
  • Save sahil280114/020d38846efdcff8f597974254e21a69 to your computer and use it in GitHub Desktop.
Save sahil280114/020d38846efdcff8f597974254e21a69 to your computer and use it in GitHub Desktop.
human_eval.py
from human_eval.data import write_jsonl, read_problems
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
problems = read_problems()
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"merged_thing",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto"
)
def fmt_prompt(prompt):
return f"<s> [INST] Complete this function, return the full function-\n {prompt} [/INST]"
def generate_one_completion(prompt,id):
print("Running- ",id)
prompt_input = fmt_prompt(prompt=prompt)
inputs = tokenizer(prompt_input, return_tensors="pt").to(model.device)
tokens = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.1,
do_sample=True,
)
output_tokens = tokenizer.decode(tokens[0], skip_special_tokens=True,clean_up_tokenization_spaces=False)
out = output_tokens.split("[/INST]")[1].strip()
if "```python" in out:
out=out.split["```python"][1].strip().split["```"][0].strip()
print(out)
return out
num_samples_per_task = 1
samples = [
dict(task_id=task_id, completion=generate_one_completion(problems[task_id]["prompt"],task_id))
for task_id in problems
for _ in range(num_samples_per_task)
]
write_jsonl("samples.jsonl", samples)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment