-
-
Save BullyNextDoor/782391aea1c95883b516c1dca41d68fa to your computer and use it in GitHub Desktop.
import prompt_tuner | |
import os | |
import termcolor | |
import numpy as np | |
trainer = prompt_tuner.BasicTrainer(1729, quiet=True) | |
print(termcolor.colored("\n\nDone.\n\n", "green"), flush=True) | |
trainer.data.ckpt_path = "EleutherAI/gpt-neo-1.3B" #@param ["EleutherAI/gpt-j-6B", "KoboldAI/fairseq-dense-13B", "EleutherAI/gpt-neo-2.7B", "EleutherAI/gpt-neo-1.3B", "facebook/opt-13b", "facebook/opt-6.7b"] {allow-input: true} | |
trainer.get_hf_checkpoint_metadata() | |
trainer.data.save_file = "softprompt.mtjsp" | |
trainingprompt = "" | |
if not trainingprompt: | |
trainer.data.prompt_method = "vocab_sample" | |
#@markdown In case you left the prompt blank you can specify here how many tokens your prompt takes up (Larger is more knowledge but takes up more space from the story context when you use your softprompt) | |
trainer.data.soft_in_dim = 80 #@param ["20", "40", "60", "80"] {type:"raw", allow-input: true} | |
else: | |
with open(trainingprompt) as f: | |
initial_softprompt = f.read() | |
tokenizer = trainer.get_tokenizer() | |
if trainer.data.newlinemode == "s": # Handle fairseq-style newlines if required | |
initial_softprompt = initial_softprompt.replace("\n", "</s>") | |
trainer.data.initial_softprompt = tokenizer.encode( | |
initial_softprompt, max_length=int(2e9), truncation=True | |
) | |
# Do this to generate an NPY file for your dataset if you haven't already done so | |
#@markdown You will need a dataset that contains your stories in plain text .txt files, adjust the location if neccesary (Unicode not supported, unix line endings recommended). | |
dataset_path = "dataset/" #@param ["/content/drive/MyDrive/dataset/"] {allow-input: true} | |
output_file = "dataset.npy" | |
#@markdown For 13B adjust the batch size to 400, everything else can remain 2048 | |
batch_size = 2048 #@param ["2048", "400"] {type:"raw", allow-input: true} | |
#@markdown For most use cases one epoch is enough, increase if you use a very small dataset. | |
epochs = 1#@param {type:"raw", allow-input: true} | |
print(termcolor.colored("\n\nTokenizing Dataset.\n\n", "green"), flush=True) | |
#This step required ftfy | |
trainer.tokenize_dataset(dataset_path, output_file, batch_size, epochs) | |
print(termcolor.colored("\n\nStarting Training.\n\n", "green"), flush=True) | |
dataset_file = output_file | |
trainer.data.dataset_file = dataset_file | |
#@markdown If you picked a small batch size increase this value accordingly (Suggested values are in the dropdown) | |
trainer.data.gradient_accumulation_steps = 16 #@param ["16", "64"] {type:"raw", allow-input: true} | |
# Set training hyperparameters here; see the demo notebook for explanation of | |
# what these mean | |
#@markdown Adjusting the learning rate effects how strongly the AI learns from the data, 3e-5 is a safe default. Only adjust if your softprompt breaks during training. | |
learning_rate = 3e-5 #@param ["3e-5"] {type:"raw", allow-input: true} | |
trainer.data.stparams = { | |
"lr": learning_rate, | |
"max_grad_norm": 10.0, | |
"weight_decay": 0.1, | |
"warmup": 0.1, | |
"end_lr_multiplier": 0.1, | |
"save_every": 10, | |
} | |
# Now, begin training! | |
trainer.train() | |
print(termcolor.colored("\n\nSave Kobold.\n\n", "green"), flush=True) | |
output_file = "my_softprompt.zip" | |
name = "my_softprompt" | |
supported = trainer.data.ckpt_path | |
description = "softprompt test" | |
author = "BullyND" | |
trainer.export_to_kobold(output_file, name, author, supported, description) | |
output_file_name = "my_softprompt.json" | |
soft_prompt_name = name | |
soft_prompt_description = supported + " - " + description | |
trainer.export_to_mkultra(output_file_name, soft_prompt_name, soft_prompt_description) |
#!/bin/bash | |
if [ ! -f "runtime/envs/koboldai/bin/python" ]; then | |
./install_requirements.sh cuda | |
fi | |
bin/micromamba run -r runtime -n koboldai python tune.py $* |
Very dumb question, how'd I do this on Windows?
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.
Hello Bully, I'm new to computer science, so could you please provide a tutorial or a link of tutorial to make it work on the Pygmalion? Thank you.
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.
You certainly will, since the code it uses relies on an old version of transformers.
transformers.utils.hub.BinaryIO no longer exists. This code will cause errors right away.
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.
Hello Bully, I'm new to computer science, so could you please provide a tutorial or a link of tutorial to make it work on the Pygmalion? Thank you.
I would certainly not try anything to do with AI as my first computer science project. This is advanced stuff. You should become proficient in Python first, at the very least, and then try something simpler with Torch.
This is a proof of concept only. You will almost certainly need to debug things to make it work for you. It will not work on Pygmalion without also modifying prompt_tuner.py to account for the slightly different file layout within the model zip.