python -m examples.models.llama2.export_llama --checkpoint $MODEL_PATH/consolidated.00.pth --params $MODEL_PATH/params.json -kv --use_sdpa_with_kv_cache -X -qmode 8da4w --group_size 128 -d fp32
Could not import fairseq2 modules.
INFO:root:Loading model with checkpoint=/Users/gchauhan/dev/llama-fast/checkpoints/meta-llama/Llama-2-7b/consolidated.00.pth, params=/Users/gchauhan/dev/llama-fast/checkpoints/meta-llama/Llama-2-7b/params.json, use_kv_cache=True, weight_type=WeightType.LLAMA
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/Users/gchauhan/dev/executorch/examples/models/llama2/export_llama.py", line 30, in <module>
main() # pragma: no cover
^^^^^^
File "/Users/gchauhan/dev/executorch/examples/models/llama2/export_llama.py", line 26, in main
export_llama(modelname, args)
File "/Users/gchauhan/dev/executorch/examples/models/llama2/export_llama_lib.py", line 408, in export_llama
return _export_llama(modelname, args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/gchauhan/dev/executorch/examples/models/llama2/export_llama_lib.py", line 529, in _export_llama
builder_exported_to_edge = _prepare_for_llama_export(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/gchauhan/dev/executorch/examples/models/llama2/export_llama_lib.py", line 486, in _prepare_for_llama_export
load_llama_model(
File "/Users/gchauhan/dev/executorch/examples/models/llama2/builder.py", line 83, in load_llama_model
model, example_inputs, _ = EagerModelFactory.create_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/gchauhan/dev/executorch/examples/models/model_factory.py", line 44, in create_model
model = model_class(**kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/gchauhan/dev/executorch/examples/models/llama2/model.py", line 139, in __init__
self.model_ = Transformer(model_args)
^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/miniconda3/envs/et/lib/python3.11/site-packages/executorch/examples/models/llama2/llama_transformer.py", line 418, in __init__
self.tok_embeddings = nn.Embedding(params.vocab_size, params.dim)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/miniconda3/envs/et/lib/python3.11/site-packages/torch/nn/modules/sparse.py", line 143, in __init__
self.weight = Parameter(torch.empty((num_embeddings, embedding_dim), **factory_kwargs),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/miniconda3/envs/et/lib/python3.11/site-packages/torch/utils/_device.py", line 78, in __torch_function__
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Trying to create tensor with negative dimension -1: [-1, 4096]
After updating params.json with vocab_size = 32000
Error