Wei Wei frank-wei

## gist:8fad116b10e600b79fd9a51dbfbe0b87
import tensorrt as trt
import torch

# https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#python_topics

"""
TensorRT Initialization
"""
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)

## gist:0dd5c4d90f1bec3c3ce016230e5e66bc
0
True
True
True
1
True
True
True
2
True

## wrong-output-repo-client.py
from openai import OpenAI

# Point to your vLLM server (default: localhost:8000)
client = OpenAI(
    base_url="http://localhost:8000/v1",  # adjust port if different
    api_key="EMPTY"  # vLLM doesn’t require authentication
)

# Choose your GPT-OSS model (must match the one you served via vllm serve)
MODEL_NAME = "/home/wwei6/local/checkpoints/gpt-oss-120b"   # or "openai/gpt-oss-120b"

## gist1.txt

P1977951167 - copy
FBID: 2225683307937911
(An Untitled Masterwork)
Visible to All Users
Author
wwei6
Created
Sat Oct 4, 2025 10:29pm
Forks

## gist2.txt

P1977390868 - copy
FBID: 751895307897003
(An Untitled Masterwork)
Visible to All Users
Author
wwei6
Created
Sat Oct 4, 2025 10:39am
Forks

## customized_structural_tags.txt
{
             "type": "structural_tag",
               "format": {
                   "type":
                   "triggered_tags",
                   "stop_after_first":
                   False,
                   "tags": [{
                       "begin": "container.exec <|constrain|>json",
                       "content": {

## example.log
server:
vllm serve  /data/local/model/Qwen2.5-3B-Instruct/   --port 8081

Client:
```
from openai import OpenAI
import json
client = OpenAI(
    base_url="http://localhost:8081/v1",
    api_key="-",
	import tensorrt as trt
	import torch

	# https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#python_topics

	"""
	TensorRT Initialization
	"""
	TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
	from openai import OpenAI

	# Point to your vLLM server (default: localhost:8000)
	client = OpenAI(
	base_url="http://localhost:8000/v1", # adjust port if different
	api_key="EMPTY" # vLLM doesn’t require authentication
	)

	# Choose your GPT-OSS model (must match the one you served via vllm serve)
	MODEL_NAME = "/home/wwei6/local/checkpoints/gpt-oss-120b" # or "openai/gpt-oss-120b"

	P1977951167 - copy
	FBID: 2225683307937911
	(An Untitled Masterwork)
	Visible to All Users
	Author
	wwei6
	Created
	Sat Oct 4, 2025 10:29pm
	Forks

	P1977390868 - copy
	FBID: 751895307897003
	(An Untitled Masterwork)
	Visible to All Users
	Author
	wwei6
	Created
	Sat Oct 4, 2025 10:39am
	Forks
	{
	"type": "structural_tag",
	"format": {
	"type":
	"triggered_tags",
	"stop_after_first":
	False,
	"tags": [{
	"begin": "container.exec <\|constrain\|>json",
	"content": {
	server:
	vllm serve /data/local/model/Qwen2.5-3B-Instruct/ --port 8081

	Client:
	```
	from openai import OpenAI
	import json
	client = OpenAI(
	base_url="http://localhost:8081/v1",
	api_key="-",