filmo/gist:6720092a1ceac129f52402dc61af0f5a

## gistfile1.txt
  0%|          | 0/1000 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Traceback (most recent call last):
  File "/home/philglau/PycharmProjects/tokenizersLLM/medium_article_falcon7b.py", line 87, in <module>
    trainer.train()
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1645, in train
    return inner_training_loop(
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1938, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2759, in training_step
    loss = self.compute_loss(model, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2784, in compute_loss
    outputs = model(**inputs)
              ^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward
    outputs = self.parallel_apply(replicas, inputs, kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 181, in parallel_apply
    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 89, in parallel_apply
    output.reraise()
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/_utils.py", line 644, in reraise
    raise exception
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/peft_model.py", line 678, in forward
    return self.base_model(
           ^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 753, in forward
    transformer_outputs = self.transformer(
                          ^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 648, in forward
    outputs = block(
              ^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 385, in forward
    attn_outputs = self.self_attention(
                   ^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 242, in forward
    fused_qkv = self.query_key_value(hidden_states)  # [batch_size, seq_length, 3 x hidden_size]
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/tuners/lora.py", line 565, in forward
    result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x4544 and 1x10614784)

  0%|          | 0/1000 [00:01<?, ?it/s]

Process finished with exit code 1
	0%\| \| 0/1000 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
	Traceback (most recent call last):
	File "/home/philglau/PycharmProjects/tokenizersLLM/medium_article_falcon7b.py", line 87, in <module>
	trainer.train()
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1645, in train
	return inner_training_loop(
	^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1938, in _inner_training_loop
	tr_loss_step = self.training_step(model, inputs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2759, in training_step
	loss = self.compute_loss(model, inputs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2784, in compute_loss
	outputs = model(**inputs)
	^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward
	outputs = self.parallel_apply(replicas, inputs, kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 181, in parallel_apply
	return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 89, in parallel_apply
	output.reraise()
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/_utils.py", line 644, in reraise
	raise exception
	RuntimeError: Caught RuntimeError in replica 0 on device 0.
	Original Traceback (most recent call last):
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
	output = module(input, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/peft_model.py", line 678, in forward
	return self.base_model(
	^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
	output = old_forward(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 753, in forward
	transformer_outputs = self.transformer(
	^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
	output = old_forward(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 648, in forward
	outputs = block(
	^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
	output = old_forward(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 385, in forward
	attn_outputs = self.self_attention(
	^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward
	output = old_forward(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 242, in forward
	fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size]
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
	return forward_call(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/tuners/lora.py", line 565, in forward
	result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x4544 and 1x10614784)

	0%\| \| 0/1000 [00:01<?, ?it/s]

	Process finished with exit code 1