Let's checkout the PR:
git fetch origin pull/625/head:dbrx
git switch dbrx
pip install -vvv --no-build-isolation -e .
Download the model:
#!/bin/sh | |
log() { | |
format="$1"; shift | |
# shellcheck disable=SC2059 | |
>&2 printf "$format\n" "$@" | |
} | |
usage() { | |
>&2 cat <<EOF |
from transformers import LlamaConfig as LC, LlamaForCausalLM as LLM, LlamaTokenizer as LT | |
from accelerate import init_empty_weights, load_checkpoint_and_dispatch | |
import torch | |
lt = LT.from_pretrained("NousResearch/Llama-2-7b-hf") | |
c = LC.from_pretrained("NousResearch/Llama-2-70b-hf") | |
c.max_position_embeddings = 32764 | |
c.rope_theta = 1000000 | |
with init_empty_weights(): m = LLM(c) | |
m = m.half().eval() | |
m.requires_grad_(False) |
Let's checkout the PR:
git fetch origin pull/625/head:dbrx
git switch dbrx
pip install -vvv --no-build-isolation -e .
Download the model:
File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/trl/trainer/utils.py", line 338, in __call__ | |
to_pad = [torch.LongTensor(ex[k]) for ex in features] | |
File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/trl/trainer/utils.py", line 338, in <listcomp> | |
to_pad = [torch.LongTensor(ex[k]) for ex in features] | |
File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/trl/trainer/utils.py", line 338, in <listcomp> | |
to_pad = [torch.LongTensor(ex[k]) for ex in features] | |
TypeError: 'NoneType' object cannot be interpreted as an integer | |
to_pad = [torch.LongTensor(ex[k]) for ex in features] | |
TypeError: 'NoneType' object cannot be interpreted as an integer | |
return inner_training_loop( |