[Model Runner V2] Add dummy profile_cudagraph_memory API (#36520)

Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
Woosuk Kwon
2026-03-09 10:20:13 -07:00
committed by GitHub
parent 1e0f917b34
commit 6e956d9eca

View File

@@ -473,6 +473,10 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# SP is not supported yet.
return num_scheduled_tokens
def profile_cudagraph_memory(self) -> int:
# NOTE(woosuk): It is TBD whether we keep this API or not.
return 0
@torch.inference_mode()
def capture_model(self) -> int:
if not self.cudagraph_manager.needs_capture():