Created
September 29, 2024 03:32
-
-
Save samos123/ee858936496e1d314785719f9287230a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WARNING 09-28 20:31:17 preprocess.py:86] Falling back on <BOS> for decoder start token id because decoder start token id is not available. | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] Engine background task failed | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] Traceback (most recent call last): | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 51, in _log_task_completion | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return_value = task.result() | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 755, in run_engine_loop | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] result = task.result() | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 678, in engine_step | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] request_outputs = await self.engine.step_async(virtual_engine) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/async_llm_engine.py", line 343, in step_async | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] outputs = await self.model_executor.execute_model_async( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/gpu_executor.py", line 185, in execute_model_async | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] output = await make_async(self.driver_worker.execute_model | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/lib/python3.12/concurrent/futures/thread.py", line 58, in run | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] result = self.fn(*self.args, **self.kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker_base.py", line 327, in execute_model | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] output = self.model_runner.execute_model( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return func(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/enc_dec_model_runner.py", line 201, in execute_model | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] hidden_or_intermediate_states = model_executable( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 1100, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] outputs = self.language_model( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 877, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] hidden_states = self.model( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 819, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] hidden_states = decoder_layer( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 751, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] hidden_states = self.cross_attn( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/mllama.py", line 700, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] output = self.attn(q, | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self._call_impl(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return forward_call(*args, **kwargs) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/layer.py", line 98, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] return self.impl.forward(query, | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/backends/xformers.py", line 595, in forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] out = self._run_memory_efficient_xformers_forward( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/vllm/attention/backends/xformers.py", line 705, in _run_memory_efficient_xformers_forward | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] attn_bias = BlockDiagonalMask.from_seqlens( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/xformers/ops/fmha/attn_bias.py", line 726, in from_seqlens | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] q_seqinfo = _SeqLenInfo.from_seqlens(q_seqlen, device=device) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/xformers/ops/fmha/attn_bias.py", line 358, in from_seqlens | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] min_seqlen, max_seqlen, seqstart_py, seqstart = cls._get_seqstart( | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] File "/usr/local/lib/python3.12/dist-packages/xformers/ops/fmha/attn_bias.py", line 346, in _get_seqstart | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] seqstart = torch.tensor(seqstart_py, dtype=torch.int32, device=device) | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] RuntimeError: CUDA error: an illegal memory access was encountered | |
ERROR 09-28 20:31:18 async_llm_engine.py:61] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
E |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment