From ffb3d553cc9258049bf4d48214c9f4106cc67cfb Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Thu, 12 Feb 2026 01:12:13 +0800 Subject: [PATCH] [Model Runner V2] Init cuda graph pool when necessary (#33217) Signed-off-by: Xinyu Chen --- vllm/v1/worker/gpu/cudagraph_utils.py | 4 +++- vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu/cudagraph_utils.py b/vllm/v1/worker/gpu/cudagraph_utils.py index bf55b99af..d5a22d6a0 100644 --- a/vllm/v1/worker/gpu/cudagraph_utils.py +++ b/vllm/v1/worker/gpu/cudagraph_utils.py @@ -45,7 +45,9 @@ class CudaGraphManager: ) self.graphs: dict[int, torch.cuda.CUDAGraph] = {} - self.pool = torch.cuda.graph_pool_handle() + self.pool = None + if self.cudagraph_mode != CUDAGraphMode.NONE: + self.pool = torch.cuda.graph_pool_handle() self.hidden_states: torch.Tensor | None = None def needs_capture(self) -> bool: diff --git a/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py b/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py index 48e7cb110..1ea7ffcb5 100644 --- a/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py +++ b/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py @@ -44,7 +44,9 @@ class EagleCudaGraphManager: ) self.graphs: dict[int, torch.cuda.CUDAGraph] = {} - self.pool = torch.cuda.graph_pool_handle() + self.pool = None + if self.cudagraph_mode != CUDAGraphMode.NONE: + self.pool = torch.cuda.graph_pool_handle() def get_cudagraph_size(self, num_tokens: int) -> int | None: return self.cudagraph_sizes.get(num_tokens)