Skip to content

Instantly share code, notes, and snippets.

@Helw150
Created May 23, 2024 00:06
Show Gist options
  • Save Helw150/08c2d2d218bc646927b3b3754ee1735d to your computer and use it in GitHub Desktop.
Save Helw150/08c2d2d218bc646927b3b3754ee1735d to your computer and use it in GitHub Desktop.
from time import sleep
from datasets import load_dataset
from huggingface_hub import InferenceClient
from ratelimit import limits, sleep_and_retry
from transformers import AutoTokenizer
dataset = load_dataset("yijingwu/HeySQuAD_human", split="train")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct")
CALLS = 240
RATE_LIMIT = 60
@sleep_and_retry
@limits(calls=CALLS, period=RATE_LIMIT)
def create_distill_data(ex):
chat = [
{"role": "user", "content": ex["question"]},
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False)
try:
response = client.text_generation(prompt, max_new_tokens=24)
except:
sleep(100)
return create_distill_data(ex)
ex["response"] = response
return ex
length = len(dataset)
splits = 14
step_size = length // splits
for start in range(splits):
ds_processed = dataset.select(
range((start * step_size), (start * step_size) + step_size)
)
ds_processed = ds_processed.map(create_distill_data)
ds_processed.push_to_hub("WillHeld/HeySQuAD_distill", split="train." + str(start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment