Created
February 9, 2025 00:53
-
-
Save asim-adl/0130e3b796dd405144208b410e8e4e21 to your computer and use it in GitHub Desktop.
vLLM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(myenv) root@vllm:/home/vllm# docker-compose up -d | |
[+] Running 5/5 | |
✔ vllm-deepseek Pulled 4.9s | |
✔ c29f5b76f736 Pull complete 2.8s | |
✔ 73c4bbda278d Pull complete 3.2s | |
✔ acc53c3e87ac Pull complete 4.7s | |
✔ ad3b14759e4f Pull complete 4.7s | |
[+] Running 2/2 | |
✔ Network vllm_default Created 0.2s | |
✔ Container vllm Started 0.5s | |
(myenv) root@vllm:/home/vllm# docker-compose logs -f | |
vllm | Requirement already satisfied: pip in /usr/local/lib/python3.11/site-packages (24.0) | |
vllm | Collecting pip | |
vllm | Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB) | |
vllm | Downloading pip-25.0-py3-none-any.whl (1.8 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 19.4 MB/s eta 0:00:00 | |
vllm | Installing collected packages: pip | |
vllm | Attempting uninstall: pip | |
vllm | Found existing installation: pip 24.0 | |
vllm | Uninstalling pip-24.0: | |
vllm | Successfully uninstalled pip-24.0 | |
vllm | Successfully installed pip-25.0 | |
vllm | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv | |
vllm | Looking in indexes: https://download.pytorch.org/whl/cpu | |
vllm | Collecting torch | |
vllm | Downloading https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl.metadata (26 kB) | |
vllm | Collecting filelock (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB) | |
vllm | Collecting typing-extensions>=4.10.0 (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB) | |
vllm | Collecting networkx (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl.metadata (5.1 kB) | |
vllm | Collecting jinja2 (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/Jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB) | |
vllm | Collecting fsspec (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB) | |
vllm | Collecting sympy==1.13.1 (from torch) | |
vllm | Downloading https://download.pytorch.org/whl/sympy-1.13.1-py3-none-any.whl (6.2 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.2/6.2 MB 72.7 MB/s eta 0:00:00 | |
vllm | Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch) | |
vllm | Downloading https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 536.2/536.2 kB 33.1 MB/s eta 0:00:00 | |
vllm | Collecting MarkupSafe>=2.0 (from jinja2->torch) | |
vllm | Downloading https://download.pytorch.org/whl/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB) | |
vllm | Downloading https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl (178.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 178.7/178.7 MB 150.1 MB/s eta 0:00:00 | |
vllm | Downloading https://download.pytorch.org/whl/typing_extensions-4.12.2-py3-none-any.whl (37 kB) | |
vllm | Downloading https://download.pytorch.org/whl/filelock-3.13.1-py3-none-any.whl (11 kB) | |
vllm | Downloading https://download.pytorch.org/whl/fsspec-2024.6.1-py3-none-any.whl (177 kB) | |
vllm | Downloading https://download.pytorch.org/whl/Jinja2-3.1.4-py3-none-any.whl (133 kB) | |
vllm | Downloading https://download.pytorch.org/whl/networkx-3.3-py3-none-any.whl (1.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 128.0 MB/s eta 0:00:00 | |
vllm | Installing collected packages: mpmath, typing-extensions, sympy, networkx, MarkupSafe, fsspec, filelock, jinja2, torch | |
vllm | Successfully installed MarkupSafe-2.1.5 filelock-3.13.1 fsspec-2024.6.1 jinja2-3.1.4 mpmath-1.3.0 networkx-3.3 sympy-1.13.1 torch-2.6.0+cpu typing-extensions-4.12.2 | |
vllm | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning. | |
vllm | Collecting vllm | |
vllm | Downloading vllm-0.7.2-cp38-abi3-manylinux1_x86_64.whl.metadata (12 kB) | |
vllm | Collecting psutil (from vllm) | |
vllm | Downloading psutil-6.1.1-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB) | |
vllm | Collecting sentencepiece (from vllm) | |
vllm | Downloading sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB) | |
vllm | Collecting numpy<2.0.0 (from vllm) | |
vllm | Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB) | |
vllm | Collecting requests>=2.26.0 (from vllm) | |
vllm | Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB) | |
vllm | Collecting tqdm (from vllm) | |
vllm | Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB) | |
vllm | Collecting blake3 (from vllm) | |
vllm | Downloading blake3-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB) | |
vllm | Collecting py-cpuinfo (from vllm) | |
vllm | Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes) | |
vllm | Collecting transformers>=4.48.2 (from vllm) | |
vllm | Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB) | |
vllm | Collecting tokenizers>=0.19.1 (from vllm) | |
vllm | Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB) | |
vllm | Collecting protobuf (from vllm) | |
vllm | Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes) | |
vllm | Collecting fastapi!=0.113.*,!=0.114.0,>=0.107.0 (from vllm) | |
vllm | Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB) | |
vllm | Collecting aiohttp (from vllm) | |
vllm | Downloading aiohttp-3.11.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB) | |
vllm | Collecting openai>=1.52.0 (from vllm) | |
vllm | Downloading openai-1.61.1-py3-none-any.whl.metadata (27 kB) | |
vllm | Collecting uvicorn[standard] (from vllm) | |
vllm | Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB) | |
vllm | Collecting pydantic>=2.9 (from vllm) | |
vllm | Downloading pydantic-2.10.6-py3-none-any.whl.metadata (30 kB) | |
vllm | Collecting prometheus_client>=0.18.0 (from vllm) | |
vllm | Downloading prometheus_client-0.21.1-py3-none-any.whl.metadata (1.8 kB) | |
vllm | Collecting pillow (from vllm) | |
vllm | Downloading pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.1 kB) | |
vllm | Collecting prometheus-fastapi-instrumentator>=7.0.0 (from vllm) | |
vllm | Downloading prometheus_fastapi_instrumentator-7.0.2-py3-none-any.whl.metadata (13 kB) | |
vllm | Collecting tiktoken>=0.6.0 (from vllm) | |
vllm | Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB) | |
vllm | Collecting lm-format-enforcer<0.11,>=0.10.9 (from vllm) | |
vllm | Downloading lm_format_enforcer-0.10.9-py3-none-any.whl.metadata (17 kB) | |
vllm | Collecting outlines==0.1.11 (from vllm) | |
vllm | Downloading outlines-0.1.11-py3-none-any.whl.metadata (17 kB) | |
vllm | Collecting lark==1.2.2 (from vllm) | |
vllm | Downloading lark-1.2.2-py3-none-any.whl.metadata (1.8 kB) | |
vllm | Collecting xgrammar>=0.1.6 (from vllm) | |
vllm | Downloading xgrammar-0.1.11-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (2.0 kB) | |
vllm | Requirement already satisfied: typing_extensions>=4.10 in /usr/local/lib/python3.11/site-packages (from vllm) (4.12.2) | |
vllm | Collecting filelock>=3.16.1 (from vllm) | |
vllm | Downloading filelock-3.17.0-py3-none-any.whl.metadata (2.9 kB) | |
vllm | Collecting partial-json-parser (from vllm) | |
vllm | Downloading partial_json_parser-0.2.1.1.post5-py3-none-any.whl.metadata (6.1 kB) | |
vllm | Collecting pyzmq (from vllm) | |
vllm | Downloading pyzmq-26.2.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.2 kB) | |
vllm | Collecting msgspec (from vllm) | |
vllm | Downloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB) | |
vllm | Collecting gguf==0.10.0 (from vllm) | |
vllm | Downloading gguf-0.10.0-py3-none-any.whl.metadata (3.5 kB) | |
vllm | Collecting importlib_metadata (from vllm) | |
vllm | Downloading importlib_metadata-8.6.1-py3-none-any.whl.metadata (4.7 kB) | |
vllm | Collecting mistral_common>=1.5.0 (from mistral_common[opencv]>=1.5.0->vllm) | |
vllm | Downloading mistral_common-1.5.2-py3-none-any.whl.metadata (4.6 kB) | |
vllm | Collecting pyyaml (from vllm) | |
vllm | Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB) | |
vllm | Collecting einops (from vllm) | |
vllm | Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB) | |
vllm | Collecting compressed-tensors==0.9.1 (from vllm) | |
vllm | Downloading compressed_tensors-0.9.1-py3-none-any.whl.metadata (6.8 kB) | |
vllm | Collecting depyf==0.18.0 (from vllm) | |
vllm | Downloading depyf-0.18.0-py3-none-any.whl.metadata (7.1 kB) | |
vllm | Collecting cloudpickle (from vllm) | |
vllm | Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB) | |
vllm | Collecting ray>=2.9 (from ray[default]>=2.9->vllm) | |
vllm | Downloading ray-2.42.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (18 kB) | |
vllm | Collecting nvidia-ml-py>=12.560.30 (from vllm) | |
vllm | Downloading nvidia_ml_py-12.570.86-py3-none-any.whl.metadata (8.7 kB) | |
vllm | Collecting torch==2.5.1 (from vllm) | |
vllm | Downloading torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (28 kB) | |
vllm | Collecting torchaudio==2.5.1 (from vllm) | |
vllm | Downloading torchaudio-2.5.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.4 kB) | |
vllm | Collecting torchvision==0.20.1 (from vllm) | |
vllm | Downloading torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.1 kB) | |
vllm | Collecting xformers==0.0.28.post3 (from vllm) | |
vllm | Downloading xformers-0.0.28.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB) | |
vllm | Collecting astor (from depyf==0.18.0->vllm) | |
vllm | Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB) | |
vllm | Collecting dill (from depyf==0.18.0->vllm) | |
vllm | Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB) | |
vllm | Collecting interegular (from outlines==0.1.11->vllm) | |
vllm | Downloading interegular-0.3.3-py37-none-any.whl.metadata (3.0 kB) | |
vllm | Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/site-packages (from outlines==0.1.11->vllm) (3.1.4) | |
vllm | Collecting nest_asyncio (from outlines==0.1.11->vllm) | |
vllm | Downloading nest_asyncio-1.6.0-py3-none-any.whl.metadata (2.8 kB) | |
vllm | Collecting diskcache (from outlines==0.1.11->vllm) | |
vllm | Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB) | |
vllm | Collecting referencing (from outlines==0.1.11->vllm) | |
vllm | Downloading referencing-0.36.2-py3-none-any.whl.metadata (2.8 kB) | |
vllm | Collecting jsonschema (from outlines==0.1.11->vllm) | |
vllm | Downloading jsonschema-4.23.0-py3-none-any.whl.metadata (7.9 kB) | |
vllm | Collecting pycountry (from outlines==0.1.11->vllm) | |
vllm | Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB) | |
vllm | Collecting airportsdata (from outlines==0.1.11->vllm) | |
vllm | Downloading airportsdata-20241001-py3-none-any.whl.metadata (8.9 kB) | |
vllm | Collecting outlines_core==0.1.26 (from outlines==0.1.11->vllm) | |
vllm | Downloading outlines_core-0.1.26-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB) | |
vllm | Requirement already satisfied: networkx in /usr/local/lib/python3.11/site-packages (from torch==2.5.1->vllm) (3.3) | |
vllm | Requirement already satisfied: fsspec in /usr/local/lib/python3.11/site-packages (from torch==2.5.1->vllm) (2024.6.1) | |
vllm | Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) | |
vllm | Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) | |
vllm | Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting nvidia-curand-cu12==10.3.5.147 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) | |
vllm | Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB) | |
vllm | Collecting nvidia-nccl-cu12==2.21.5 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB) | |
vllm | Collecting nvidia-nvtx-cu12==12.4.127 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.7 kB) | |
vllm | Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch==2.5.1->vllm) | |
vllm | Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB) | |
vllm | Collecting triton==3.1.0 (from torch==2.5.1->vllm) | |
vllm | Downloading triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB) | |
vllm | Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/site-packages (from torch==2.5.1->vllm) (1.13.1) | |
vllm | Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/site-packages (from sympy==1.13.1->torch==2.5.1->vllm) (1.3.0) | |
vllm | Collecting starlette<0.46.0,>=0.40.0 (from fastapi!=0.113.*,!=0.114.0,>=0.107.0->vllm) | |
vllm | Downloading starlette-0.45.3-py3-none-any.whl.metadata (6.3 kB) | |
vllm | Collecting packaging (from lm-format-enforcer<0.11,>=0.10.9->vllm) | |
vllm | Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB) | |
vllm | Collecting pillow (from vllm) | |
vllm | Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB) | |
vllm | Collecting tiktoken>=0.6.0 (from vllm) | |
vllm | Downloading tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB) | |
vllm | Collecting opencv-python-headless<5.0.0,>=4.0.0 (from mistral_common[opencv]>=1.5.0->vllm) | |
vllm | Downloading opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB) | |
vllm | Collecting anyio<5,>=3.5.0 (from openai>=1.52.0->vllm) | |
vllm | Downloading anyio-4.8.0-py3-none-any.whl.metadata (4.6 kB) | |
vllm | Collecting distro<2,>=1.7.0 (from openai>=1.52.0->vllm) | |
vllm | Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB) | |
vllm | Collecting httpx<1,>=0.23.0 (from openai>=1.52.0->vllm) | |
vllm | Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB) | |
vllm | Collecting jiter<1,>=0.4.0 (from openai>=1.52.0->vllm) | |
vllm | Downloading jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB) | |
vllm | Collecting sniffio (from openai>=1.52.0->vllm) | |
vllm | Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB) | |
vllm | Collecting annotated-types>=0.6.0 (from pydantic>=2.9->vllm) | |
vllm | Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB) | |
vllm | Collecting pydantic-core==2.27.2 (from pydantic>=2.9->vllm) | |
vllm | Downloading pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB) | |
vllm | Collecting click>=7.0 (from ray>=2.9->ray[default]>=2.9->vllm) | |
vllm | Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB) | |
vllm | Collecting msgpack<2.0.0,>=1.0.0 (from ray>=2.9->ray[default]>=2.9->vllm) | |
vllm | Downloading msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB) | |
vllm | Collecting aiosignal (from ray>=2.9->ray[default]>=2.9->vllm) | |
vllm | Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB) | |
vllm | Collecting frozenlist (from ray>=2.9->ray[default]>=2.9->vllm) | |
vllm | Downloading frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB) | |
vllm | Collecting aiohttp-cors (from ray[default]>=2.9->vllm) | |
vllm | Downloading aiohttp_cors-0.7.0-py3-none-any.whl.metadata (20 kB) | |
vllm | Collecting colorful (from ray[default]>=2.9->vllm) | |
vllm | Downloading colorful-0.5.6-py2.py3-none-any.whl.metadata (16 kB) | |
vllm | Collecting opencensus (from ray[default]>=2.9->vllm) | |
vllm | Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB) | |
vllm | Collecting smart-open (from ray[default]>=2.9->vllm) | |
vllm | Downloading smart_open-7.1.0-py3-none-any.whl.metadata (24 kB) | |
vllm | Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[default]>=2.9->vllm) | |
vllm | Downloading virtualenv-20.29.1-py3-none-any.whl.metadata (4.5 kB) | |
vllm | Collecting py-spy>=0.2.0 (from ray[default]>=2.9->vllm) | |
vllm | Downloading py_spy-0.4.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (16 kB) | |
vllm | Collecting grpcio>=1.42.0 (from ray[default]>=2.9->vllm) | |
vllm | Downloading grpcio-1.70.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB) | |
vllm | Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->vllm) | |
vllm | Downloading aiohappyeyeballs-2.4.6-py3-none-any.whl.metadata (5.9 kB) | |
vllm | Collecting attrs>=17.3.0 (from aiohttp->vllm) | |
vllm | Downloading attrs-25.1.0-py3-none-any.whl.metadata (10 kB) | |
vllm | Collecting multidict<7.0,>=4.5 (from aiohttp->vllm) | |
vllm | Downloading multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB) | |
vllm | Collecting propcache>=0.2.0 (from aiohttp->vllm) | |
vllm | Downloading propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB) | |
vllm | Collecting yarl<2.0,>=1.17.0 (from aiohttp->vllm) | |
vllm | Downloading yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (69 kB) | |
vllm | Collecting charset-normalizer<4,>=2 (from requests>=2.26.0->vllm) | |
vllm | Downloading charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (35 kB) | |
vllm | Collecting idna<4,>=2.5 (from requests>=2.26.0->vllm) | |
vllm | Downloading idna-3.10-py3-none-any.whl.metadata (10 kB) | |
vllm | Collecting urllib3<3,>=1.21.1 (from requests>=2.26.0->vllm) | |
vllm | Downloading urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB) | |
vllm | Collecting certifi>=2017.4.17 (from requests>=2.26.0->vllm) | |
vllm | Downloading certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB) | |
vllm | Collecting regex>=2022.1.18 (from tiktoken>=0.6.0->vllm) | |
vllm | Downloading regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB) | |
vllm | Collecting huggingface-hub<1.0,>=0.16.4 (from tokenizers>=0.19.1->vllm) | |
vllm | Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB) | |
vllm | Collecting safetensors>=0.4.1 (from transformers>=4.48.2->vllm) | |
vllm | Downloading safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB) | |
vllm | Collecting pybind11 (from xgrammar>=0.1.6->vllm) | |
vllm | Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB) | |
vllm | Collecting pytest (from xgrammar>=0.1.6->vllm) | |
vllm | Downloading pytest-8.3.4-py3-none-any.whl.metadata (7.5 kB) | |
vllm | Collecting zipp>=3.20 (from importlib_metadata->vllm) | |
vllm | Downloading zipp-3.21.0-py3-none-any.whl.metadata (3.7 kB) | |
vllm | Collecting h11>=0.8 (from uvicorn[standard]->vllm) | |
vllm | Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB) | |
vllm | Collecting httptools>=0.6.3 (from uvicorn[standard]->vllm) | |
vllm | Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB) | |
vllm | Collecting python-dotenv>=0.13 (from uvicorn[standard]->vllm) | |
vllm | Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB) | |
vllm | Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]->vllm) | |
vllm | Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB) | |
vllm | Collecting watchfiles>=0.13 (from uvicorn[standard]->vllm) | |
vllm | Downloading watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB) | |
vllm | Collecting websockets>=10.4 (from uvicorn[standard]->vllm) | |
vllm | Downloading websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB) | |
vllm | Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai>=1.52.0->vllm) | |
vllm | Downloading httpcore-1.0.7-py3-none-any.whl.metadata (21 kB) | |
vllm | Collecting jsonschema-specifications>=2023.03.6 (from jsonschema->outlines==0.1.11->vllm) | |
vllm | Downloading jsonschema_specifications-2024.10.1-py3-none-any.whl.metadata (3.0 kB) | |
vllm | Collecting rpds-py>=0.7.1 (from jsonschema->outlines==0.1.11->vllm) | |
vllm | Downloading rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB) | |
vllm | Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[default]>=2.9->vllm) | |
vllm | Downloading distlib-0.3.9-py2.py3-none-any.whl.metadata (5.2 kB) | |
vllm | Collecting platformdirs<5,>=3.9.1 (from virtualenv!=20.21.1,>=20.0.24->ray[default]>=2.9->vllm) | |
vllm | Downloading platformdirs-4.3.6-py3-none-any.whl.metadata (11 kB) | |
vllm | Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/site-packages (from jinja2->outlines==0.1.11->vllm) (2.1.5) | |
vllm | Collecting opencensus-context>=0.1.3 (from opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB) | |
vllm | Collecting six~=1.16 (from opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB) | |
vllm | Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading google_api_core-2.24.1-py3-none-any.whl.metadata (3.0 kB) | |
vllm | Collecting iniconfig (from pytest->xgrammar>=0.1.6->vllm) | |
vllm | Downloading iniconfig-2.0.0-py3-none-any.whl.metadata (2.6 kB) | |
vllm | Collecting pluggy<2,>=1.5 (from pytest->xgrammar>=0.1.6->vllm) | |
vllm | Downloading pluggy-1.5.0-py3-none-any.whl.metadata (4.8 kB) | |
vllm | Collecting wrapt (from smart-open->ray[default]>=2.9->vllm) | |
vllm | Downloading wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.4 kB) | |
vllm | Collecting googleapis-common-protos<2.0.dev0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading googleapis_common_protos-1.67.0rc1-py2.py3-none-any.whl.metadata (5.1 kB) | |
vllm | Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading proto_plus-1.26.0-py3-none-any.whl.metadata (2.2 kB) | |
vllm | Collecting google-auth<3.0.dev0,>=2.14.1 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading google_auth-2.38.0-py2.py3-none-any.whl.metadata (4.8 kB) | |
vllm | Collecting cachetools<6.0,>=2.0.0 (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading cachetools-5.5.1-py3-none-any.whl.metadata (5.4 kB) | |
vllm | Collecting pyasn1-modules>=0.2.1 (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading pyasn1_modules-0.4.1-py3-none-any.whl.metadata (3.5 kB) | |
vllm | Collecting rsa<5,>=3.1.4 (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading rsa-4.9-py3-none-any.whl.metadata (4.2 kB) | |
vllm | Collecting pyasn1<0.7.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]>=2.9->vllm) | |
vllm | Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB) | |
vllm | Downloading vllm-0.7.2-cp38-abi3-manylinux1_x86_64.whl (264.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 264.3/264.3 MB 117.9 MB/s eta 0:00:00 | |
vllm | Downloading compressed_tensors-0.9.1-py3-none-any.whl (96 kB) | |
vllm | Downloading depyf-0.18.0-py3-none-any.whl (38 kB) | |
vllm | Downloading gguf-0.10.0-py3-none-any.whl (71 kB) | |
vllm | Downloading lark-1.2.2-py3-none-any.whl (111 kB) | |
vllm | Downloading outlines-0.1.11-py3-none-any.whl (87 kB) | |
vllm | Downloading torch-2.5.1-cp311-cp311-manylinux1_x86_64.whl (906.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 906.5/906.5 MB 84.1 MB/s eta 0:00:00 | |
vllm | Downloading torchaudio-2.5.1-cp311-cp311-manylinux1_x86_64.whl (3.4 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.4/3.4 MB 100.7 MB/s eta 0:00:00 | |
vllm | Downloading torchvision-0.20.1-cp311-cp311-manylinux1_x86_64.whl (7.2 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.2/7.2 MB 106.4 MB/s eta 0:00:00 | |
vllm | Downloading xformers-0.0.28.post3-cp311-cp311-manylinux_2_28_x86_64.whl (16.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 16.7/16.7 MB 111.7 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 112.5 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.8/13.8 MB 112.8 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.6/24.6 MB 108.9 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 883.7/883.7 kB 95.8 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 94.6 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 119.9 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 117.4 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 117.8 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 119.6 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 188.7/188.7 MB 120.8 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 115.7 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (99 kB) | |
vllm | Downloading outlines_core-0.1.26-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (343 kB) | |
vllm | Downloading triton-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 209.5/209.5 MB 120.5 MB/s eta 0:00:00 | |
vllm | Downloading fastapi-0.115.8-py3-none-any.whl (94 kB) | |
vllm | Downloading filelock-3.17.0-py3-none-any.whl (16 kB) | |
vllm | Downloading lm_format_enforcer-0.10.9-py3-none-any.whl (43 kB) | |
vllm | Downloading mistral_common-1.5.2-py3-none-any.whl (6.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.5/6.5 MB 96.7 MB/s eta 0:00:00 | |
vllm | Downloading sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 87.9 MB/s eta 0:00:00 | |
vllm | Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 18.3/18.3 MB 109.9 MB/s eta 0:00:00 | |
vllm | Downloading nvidia_ml_py-12.570.86-py3-none-any.whl (44 kB) | |
vllm | Downloading openai-1.61.1-py3-none-any.whl (463 kB) | |
vllm | Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.5/4.5 MB 80.3 MB/s eta 0:00:00 | |
vllm | Downloading prometheus_client-0.21.1-py3-none-any.whl (54 kB) | |
vllm | Downloading prometheus_fastapi_instrumentator-7.0.2-py3-none-any.whl (18 kB) | |
vllm | Downloading pydantic-2.10.6-py3-none-any.whl (431 kB) | |
vllm | Downloading pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 112.2 MB/s eta 0:00:00 | |
vllm | Downloading PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (762 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 763.0/763.0 kB 65.2 MB/s eta 0:00:00 | |
vllm | Downloading ray-2.42.0-cp311-cp311-manylinux2014_x86_64.whl (67.4 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 67.4/67.4 MB 119.5 MB/s eta 0:00:00 | |
vllm | Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB) | |
vllm | Downloading aiohttp-3.11.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 116.2 MB/s eta 0:00:00 | |
vllm | Downloading requests-2.32.3-py3-none-any.whl (64 kB) | |
vllm | Downloading tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 97.8 MB/s eta 0:00:00 | |
vllm | Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.0/3.0 MB 125.7 MB/s eta 0:00:00 | |
vllm | Downloading tqdm-4.67.1-py3-none-any.whl (78 kB) | |
vllm | Downloading transformers-4.48.3-py3-none-any.whl (9.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.7/9.7 MB 111.1 MB/s eta 0:00:00 | |
vllm | Downloading xgrammar-0.1.11-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (396 kB) | |
vllm | Downloading blake3-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (376 kB) | |
vllm | Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB) | |
vllm | Downloading einops-0.8.0-py3-none-any.whl (43 kB) | |
vllm | Downloading importlib_metadata-8.6.1-py3-none-any.whl (26 kB) | |
vllm | Downloading msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (210 kB) | |
vllm | Downloading partial_json_parser-0.2.1.1.post5-py3-none-any.whl (10 kB) | |
vllm | Downloading psutil-6.1.1-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (287 kB) | |
vllm | Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB) | |
vllm | Downloading pyzmq-26.2.1-cp311-cp311-manylinux_2_28_x86_64.whl (874 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 874.5/874.5 kB 55.2 MB/s eta 0:00:00 | |
vllm | Downloading aiohappyeyeballs-2.4.6-py3-none-any.whl (14 kB) | |
vllm | Downloading aiosignal-1.3.2-py2.py3-none-any.whl (7.6 kB) | |
vllm | Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB) | |
vllm | Downloading anyio-4.8.0-py3-none-any.whl (96 kB) | |
vllm | Downloading attrs-25.1.0-py3-none-any.whl (63 kB) | |
vllm | Downloading certifi-2025.1.31-py3-none-any.whl (166 kB) | |
vllm | Downloading charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (143 kB) | |
vllm | Downloading click-8.1.8-py3-none-any.whl (98 kB) | |
vllm | Downloading distro-1.9.0-py3-none-any.whl (20 kB) | |
vllm | Downloading frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (274 kB) | |
vllm | Downloading grpcio-1.70.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.9/5.9 MB 119.2 MB/s eta 0:00:00 | |
vllm | Downloading h11-0.14.0-py3-none-any.whl (58 kB) | |
vllm | Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB) | |
vllm | Downloading httpx-0.28.1-py3-none-any.whl (73 kB) | |
vllm | Downloading httpcore-1.0.7-py3-none-any.whl (78 kB) | |
vllm | Downloading huggingface_hub-0.28.1-py3-none-any.whl (464 kB) | |
vllm | Downloading idna-3.10-py3-none-any.whl (70 kB) | |
vllm | Downloading interegular-0.3.3-py37-none-any.whl (23 kB) | |
vllm | Downloading jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (345 kB) | |
vllm | Downloading jsonschema-4.23.0-py3-none-any.whl (88 kB) | |
vllm | Downloading msgpack-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (403 kB) | |
vllm | Downloading multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB) | |
vllm | Downloading opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (50.0 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50.0/50.0 MB 113.2 MB/s eta 0:00:00 | |
vllm | Downloading packaging-24.2-py3-none-any.whl (65 kB) | |
vllm | Downloading propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (231 kB) | |
vllm | Downloading py_spy-0.4.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.7 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.7/2.7 MB 87.7 MB/s eta 0:00:00 | |
vllm | Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB) | |
vllm | Downloading referencing-0.36.2-py3-none-any.whl (26 kB) | |
vllm | Downloading regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (792 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 792.7/792.7 kB 50.3 MB/s eta 0:00:00 | |
vllm | Downloading safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (461 kB) | |
vllm | Downloading sniffio-1.3.1-py3-none-any.whl (10 kB) | |
vllm | Downloading starlette-0.45.3-py3-none-any.whl (71 kB) | |
vllm | Downloading urllib3-2.3.0-py3-none-any.whl (128 kB) | |
vllm | Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.0/4.0 MB 89.9 MB/s eta 0:00:00 | |
vllm | Downloading virtualenv-20.29.1-py3-none-any.whl (4.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.3/4.3 MB 102.4 MB/s eta 0:00:00 | |
vllm | Downloading watchfiles-1.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (452 kB) | |
vllm | Downloading websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (169 kB) | |
vllm | Downloading yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (344 kB) | |
vllm | Downloading zipp-3.21.0-py3-none-any.whl (9.6 kB) | |
vllm | Downloading aiohttp_cors-0.7.0-py3-none-any.whl (27 kB) | |
vllm | Downloading airportsdata-20241001-py3-none-any.whl (912 kB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 912.7/912.7 kB 86.6 MB/s eta 0:00:00 | |
vllm | Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB) | |
vllm | Downloading colorful-0.5.6-py2.py3-none-any.whl (201 kB) | |
vllm | Downloading dill-0.3.9-py3-none-any.whl (119 kB) | |
vllm | Downloading diskcache-5.6.3-py3-none-any.whl (45 kB) | |
vllm | Downloading nest_asyncio-1.6.0-py3-none-any.whl (5.2 kB) | |
vllm | Downloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB) | |
vllm | Downloading pybind11-2.13.6-py3-none-any.whl (243 kB) | |
vllm | Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB) | |
vllm | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 106.0 MB/s eta 0:00:00 | |
vllm | Downloading pytest-8.3.4-py3-none-any.whl (343 kB) | |
vllm | Downloading smart_open-7.1.0-py3-none-any.whl (61 kB) | |
vllm | Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB) | |
vllm | Downloading distlib-0.3.9-py2.py3-none-any.whl (468 kB) | |
vllm | Downloading google_api_core-2.24.1-py3-none-any.whl (160 kB) | |
vllm | Downloading jsonschema_specifications-2024.10.1-py3-none-any.whl (18 kB) | |
vllm | Downloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB) | |
vllm | Downloading platformdirs-4.3.6-py3-none-any.whl (18 kB) | |
vllm | Downloading pluggy-1.5.0-py3-none-any.whl (20 kB) | |
vllm | Downloading rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (381 kB) | |
vllm | Downloading six-1.17.0-py2.py3-none-any.whl (11 kB) | |
vllm | Downloading iniconfig-2.0.0-py3-none-any.whl (5.9 kB) | |
vllm | Downloading wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (83 kB) | |
vllm | Downloading google_auth-2.38.0-py2.py3-none-any.whl (210 kB) | |
vllm | Downloading googleapis_common_protos-1.67.0rc1-py2.py3-none-any.whl (165 kB) | |
vllm | Downloading proto_plus-1.26.0-py3-none-any.whl (50 kB) | |
vllm | Downloading cachetools-5.5.1-py3-none-any.whl (9.5 kB) | |
vllm | Downloading pyasn1_modules-0.4.1-py3-none-any.whl (181 kB) | |
vllm | Downloading rsa-4.9-py3-none-any.whl (34 kB) | |
vllm | Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB) | |
vllm | Installing collected packages: sentencepiece, py-spy, py-cpuinfo, opencensus-context, nvidia-ml-py, distlib, colorful, blake3, zipp, wrapt, websockets, uvloop, urllib3, tqdm, sniffio, six, safetensors, rpds-py, regex, pyzmq, pyyaml, python-dotenv, pydantic-core, pycountry, pybind11, pyasn1, psutil, protobuf, propcache, prometheus_client, pluggy, platformdirs, pillow, partial-json-parser, packaging, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, nest_asyncio, multidict, msgspec, msgpack, lark, jiter, interegular, iniconfig, idna, httptools, h11, grpcio, frozenlist, filelock, einops, distro, diskcache, dill, cloudpickle, click, charset-normalizer, certifi, cachetools, attrs, astor, annotated-types, airportsdata, aiohappyeyeballs, yarl, virtualenv, uvicorn, triton, smart-open, rsa, requests, referencing, pytest, pydantic, pyasn1-modules, proto-plus, opencv-python-headless, nvidia-cusparse-cu12, nvidia-cudnn-cu12, importlib_metadata, httpcore, googleapis-common-protos, gguf, depyf, anyio, aiosignal, watchfiles, tiktoken, starlette, nvidia-cusolver-cu12, lm-format-enforcer, jsonschema-specifications, huggingface-hub, httpx, google-auth, aiohttp, torch, tokenizers, prometheus-fastapi-instrumentator, openai, jsonschema, google-api-core, fastapi, aiohttp-cors, xformers, transformers, torchvision, torchaudio, ray, outlines_core, opencensus, mistral_common, xgrammar, outlines, compressed-tensors, vllm | |
vllm | Attempting uninstall: filelock | |
vllm | Found existing installation: filelock 3.13.1 | |
vllm | Uninstalling filelock-3.13.1: | |
vllm | Successfully uninstalled filelock-3.13.1 | |
vllm | Attempting uninstall: torch | |
vllm | Found existing installation: torch 2.6.0+cpu | |
vllm | Uninstalling torch-2.6.0+cpu: | |
vllm | Successfully uninstalled torch-2.6.0+cpu | |
vllm | Successfully installed aiohappyeyeballs-2.4.6 aiohttp-3.11.12 aiohttp-cors-0.7.0 aiosignal-1.3.2 airportsdata-20241001 annotated-types-0.7.0 anyio-4.8.0 astor-0.8.1 attrs-25.1.0 blake3-1.0.4 cachetools-5.5.1 certifi-2025.1.31 charset-normalizer-3.4.1 click-8.1.8 cloudpickle-3.1.1 colorful-0.5.6 compressed-tensors-0.9.1 depyf-0.18.0 dill-0.3.9 diskcache-5.6.3 distlib-0.3.9 distro-1.9.0 einops-0.8.0 fastapi-0.115.8 filelock-3.17.0 frozenlist-1.5.0 gguf-0.10.0 google-api-core-2.24.1 google-auth-2.38.0 googleapis-common-protos-1.67.0rc1 grpcio-1.70.0 h11-0.14.0 httpcore-1.0.7 httptools-0.6.4 httpx-0.28.1 huggingface-hub-0.28.1 idna-3.10 importlib_metadata-8.6.1 iniconfig-2.0.0 interegular-0.3.3 jiter-0.8.2 jsonschema-4.23.0 jsonschema-specifications-2024.10.1 lark-1.2.2 lm-format-enforcer-0.10.9 mistral_common-1.5.2 msgpack-1.1.0 msgspec-0.19.0 multidict-6.1.0 nest_asyncio-1.6.0 numpy-1.26.4 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-ml-py-12.570.86 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.4.127 openai-1.61.1 opencensus-0.11.4 opencensus-context-0.1.3 opencv-python-headless-4.11.0.86 outlines-0.1.11 outlines_core-0.1.26 packaging-24.2 partial-json-parser-0.2.1.1.post5 pillow-10.4.0 platformdirs-4.3.6 pluggy-1.5.0 prometheus-fastapi-instrumentator-7.0.2 prometheus_client-0.21.1 propcache-0.2.1 proto-plus-1.26.0 protobuf-5.29.3 psutil-6.1.1 py-cpuinfo-9.0.0 py-spy-0.4.0 pyasn1-0.6.1 pyasn1-modules-0.4.1 pybind11-2.13.6 pycountry-24.6.1 pydantic-2.10.6 pydantic-core-2.27.2 pytest-8.3.4 python-dotenv-1.0.1 pyyaml-6.0.2 pyzmq-26.2.1 ray-2.42.0 referencing-0.36.2 regex-2024.11.6 requests-2.32.3 rpds-py-0.22.3 rsa-4.9 safetensors-0.5.2 sentencepiece-0.2.0 six-1.17.0 smart-open-7.1.0 sniffio-1.3.1 starlette-0.45.3 tiktoken-0.7.0 tokenizers-0.21.0 torch-2.5.1 torchaudio-2.5.1 torchvision-0.20.1 tqdm-4.67.1 transformers-4.48.3 triton-3.1.0 urllib3-2.3.0 uvicorn-0.34.0 uvloop-0.21.0 virtualenv-20.29.1 vllm-0.7.2 watchfiles-1.0.4 websockets-14.2 wrapt-1.17.2 xformers-0.0.28.post3 xgrammar-0.1.11 yarl-1.18.3 zipp-3.21.0 | |
vllm | WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning. | |
vllm | INFO 02-09 00:44:11 __init__.py:194] No platform detected, vLLM is running on UnspecifiedPlatform | |
vllm | INFO 02-09 00:44:12 api_server.py:120] vLLM API server version 0.7.2 | |
vllm | INFO 02-09 00:44:12 api_server.py:121] args: Namespace(host='0.0.0.0', port=8000, ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, log_level='debug', model='deepseek-ai/DeepSeek-V3', task='auto', tokenizer=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=False, allowed_local_media_path=None, download_dir=None, load_format='auto', config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', kv_cache_dtype='auto', max_model_len=None, guided_decoding_backend='xgrammar', logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, max_parallel_loading_workers=None, ray_workers_use_nsight=False, block_size=None, enable_prefix_caching=None, disable_sliding_window=False, use_v2_block_manager=True, num_lookahead_slots=0, seed=0, swap_space=4, cpu_offload_gb=0, gpu_memory_utilization=0.9, num_gpu_blocks_override=None, max_num_batched_tokens=None, max_num_seqs=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, disable_custom_all_reduce=False, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config=None, limit_mm_per_prompt=None, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=False, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=False, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', num_scheduler_steps=1, multi_step_stream_outputs=True, scheduler_delay_factor=0.0, enable_chunked_prefill=None, speculative_model=None, speculative_model_quantization=None, num_speculative_tokens=None, speculative_disable_mqa_scorer=False, speculative_draft_tensor_parallel_size=None, speculative_max_model_len=None, speculative_disable_by_batch_size=None, ngram_prompt_lookup_max=None, ngram_prompt_lookup_min=None, spec_decoding_acceptance_method='rejection_sampler', typical_acceptance_sampler_posterior_threshold=None, typical_acceptance_sampler_posterior_alpha=None, disable_logprobs_during_spec_decoding=None, model_loader_extra_config=None, ignore_patterns=[], preemption_mode=None, served_model_name=None, qlora_adapter_name_or_path=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, scheduling_policy='fcfs', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', generation_config=None, override_generation_config=None, enable_sleep_mode=False, calculate_kv_scales=False, disable_log_requests=False) | |
vllm | Traceback (most recent call last): | |
vllm | File "<frozen runpy>", line 198, in _run_module_as_main | |
vllm | File "<frozen runpy>", line 88, in _run_code | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/api_server.py", line 169, in <module> | |
vllm | asyncio.run(run_server(args)) | |
vllm | File "/usr/local/lib/python3.11/asyncio/runners.py", line 190, in run | |
vllm | return runner.run(main) | |
vllm | ^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/asyncio/runners.py", line 118, in run | |
vllm | return self._loop.run_until_complete(task) | |
vllm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete | |
vllm | return future.result() | |
vllm | ^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/api_server.py", line 125, in run_server | |
vllm | app = await init_app(args, llm_engine) | |
vllm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/entrypoints/api_server.py", line 111, in init_app | |
vllm | if llm_engine is not None else AsyncLLMEngine.from_engine_args( | |
vllm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 639, in from_engine_args | |
vllm | engine_config = engine_args.create_engine_config(usage_context) | |
vllm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/engine/arg_utils.py", line 1074, in create_engine_config | |
vllm | device_config = DeviceConfig(device=self.device) | |
vllm | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
vllm | File "/usr/local/lib/python3.11/site-packages/vllm/config.py", line 1626, in __init__ | |
vllm | raise RuntimeError("Failed to infer device type") | |
vllm | RuntimeError: Failed to infer device type | |
vllm exited with code 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment