HamidShojanazeri/gist:37698454fec0a3c5beb083e77af66528

## gistfile1.txt
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/TensorRT-LLM/examples/server/server.py", line 5, in <module>
    import tensorrt_llm
  File "/TensorRT-LLM/tensorrt_llm/__init__.py", line 47, in <module>
    from .hlapi.llm import LLM, ModelConfig
  File "/TensorRT-LLM/tensorrt_llm/hlapi/__init__.py", line 1, in <module>
    from .llm import LLM, ModelConfig
  File "/TensorRT-LLM/tensorrt_llm/hlapi/llm.py", line 15, in <module>
    import tensorrt_llm.bindings as tllm
ModuleNotFoundError: No module named 'tensorrt_llm.bindings'
root@4d0626aa08a5:/TensorRT-LLM# cd -
/TensorRT-LLM/examples/server
root@4d0626aa08a5:/TensorRT-LLM/examples/server# python3 server.py --model_dir ./tmp/llama/7B/trt_engines/bf16/1-gpu/ --tokenizer_type llama
[TensorRT-LLM] TensorRT-LLM version: 0.9.0.dev2024031200
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
    response.raise_for_status()
  File "/usr/local/lib/python3.10/dist-packages/requests/models.py", line 1021, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/llama/resolve/main/tokenizer_config.json

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 398, in cached_file
    resolved_file = hf_hub_download(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1403, in hf_hub_download
    raise head_call_error
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download
    metadata = get_hf_file_metadata(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1667, in get_hf_file_metadata
    r = _request_wrapper(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper
    response = _request_wrapper(
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper
    hf_raise_for_status(response)
  File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 352, in hf_raise_for_status
    raise RepositoryNotFoundError(message, response) from e
huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-65f2380b-0d072e195d50664c6d49f66d;712adfbe-9512-407d-be35-5c3ad4eda32b)

Repository Not Found for url: https://huggingface.co/llama/resolve/main/tokenizer_config.json.
Please make sure you specified the correct `repo_id` and `repo_type`.
If you are trying to access a private or gated repo, make sure you are authenticated.
Invalid username or password.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/TensorRT-LLM/examples/server/server.py", line 80, in <module>
    asyncio.run(main(args))
  File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
    return loop.run_until_complete(main)
  File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
    return future.result()
  File "/TensorRT-LLM/examples/server/server.py", line 61, in main
    executor = GenerationExecutor(args.model_dir, args.tokenizer_type,
  File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 188, in __init__
    self.tokenizer = AutoTokenizer.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py", line 767, in from_pretrained
    tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py", line 600, in get_tokenizer_config
    resolved_config_file = cached_file(
  File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 421, in cached_file
    raise EnvironmentError(
OSError: llama is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Exception ignored in: <function GenerationExecutor.__del__ at 0x7fb33eb55090>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 380, in __del__
  File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 375, in __exit__
AttributeError: 'GenerationExecutor' object has no attribute 'engine'
	Traceback (most recent call last):
	File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
	return _run_code(code, main_globals, None,
	File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
	exec(code, run_globals)
	File "/TensorRT-LLM/examples/server/server.py", line 5, in <module>
	import tensorrt_llm
	File "/TensorRT-LLM/tensorrt_llm/__init__.py", line 47, in <module>
	from .hlapi.llm import LLM, ModelConfig
	File "/TensorRT-LLM/tensorrt_llm/hlapi/__init__.py", line 1, in <module>
	from .llm import LLM, ModelConfig
	File "/TensorRT-LLM/tensorrt_llm/hlapi/llm.py", line 15, in <module>
	import tensorrt_llm.bindings as tllm
	ModuleNotFoundError: No module named 'tensorrt_llm.bindings'
	root@4d0626aa08a5:/TensorRT-LLM# cd -
	/TensorRT-LLM/examples/server
	root@4d0626aa08a5:/TensorRT-LLM/examples/server# python3 server.py --model_dir ./tmp/llama/7B/trt_engines/bf16/1-gpu/ --tokenizer_type llama
	[TensorRT-LLM] TensorRT-LLM version: 0.9.0.dev2024031200
	Traceback (most recent call last):
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
	response.raise_for_status()
	File "/usr/local/lib/python3.10/dist-packages/requests/models.py", line 1021, in raise_for_status
	raise HTTPError(http_error_msg, response=self)
	requests.exceptions.HTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/llama/resolve/main/tokenizer_config.json

	The above exception was the direct cause of the following exception:

	Traceback (most recent call last):
	File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 398, in cached_file
	resolved_file = hf_hub_download(
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
	return fn(args, *kwargs)
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1403, in hf_hub_download
	raise head_call_error
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1261, in hf_hub_download
	metadata = get_hf_file_metadata(
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_validators.py", line 118, in _inner_fn
	return fn(args, *kwargs)
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 1667, in get_hf_file_metadata
	r = _request_wrapper(
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 385, in _request_wrapper
	response = _request_wrapper(
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py", line 409, in _request_wrapper
	hf_raise_for_status(response)
	File "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_errors.py", line 352, in hf_raise_for_status
	raise RepositoryNotFoundError(message, response) from e
	huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: Root=1-65f2380b-0d072e195d50664c6d49f66d;712adfbe-9512-407d-be35-5c3ad4eda32b)

	Repository Not Found for url: https://huggingface.co/llama/resolve/main/tokenizer_config.json.
	Please make sure you specified the correct `repo_id` and `repo_type`.
	If you are trying to access a private or gated repo, make sure you are authenticated.
	Invalid username or password.

	The above exception was the direct cause of the following exception:

	Traceback (most recent call last):
	File "/TensorRT-LLM/examples/server/server.py", line 80, in <module>
	asyncio.run(main(args))
	File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
	return loop.run_until_complete(main)
	File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
	return future.result()
	File "/TensorRT-LLM/examples/server/server.py", line 61, in main
	executor = GenerationExecutor(args.model_dir, args.tokenizer_type,
	File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 188, in __init__
	self.tokenizer = AutoTokenizer.from_pretrained(
	File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py", line 767, in from_pretrained
	tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
	File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py", line 600, in get_tokenizer_config
	resolved_config_file = cached_file(
	File "/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py", line 421, in cached_file
	raise EnvironmentError(
	OSError: llama is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
	If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
	Exception ignored in: <function GenerationExecutor.__del__ at 0x7fb33eb55090>
	Traceback (most recent call last):
	File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 380, in __del__
	File "/usr/local/lib/python3.10/dist-packages/tensorrt_llm/executor.py", line 375, in __exit__
	AttributeError: 'GenerationExecutor' object has no attribute 'engine'