[Model Runner V2] Add model_state inputs to CUDA graph capture (#36544)

Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
Woosuk Kwon
2026-03-09 15:14:51 -07:00
committed by GitHub
parent 203a7f27da
commit 2a194ddd72

View File

@@ -320,6 +320,7 @@ class ModelCudaGraphManager(CudaGraphManager):
model_inputs = { model_inputs = {
"input_ids": input_buffers.input_ids[:num_tokens], "input_ids": input_buffers.input_ids[:num_tokens],
"positions": input_buffers.positions[:num_tokens], "positions": input_buffers.positions[:num_tokens],
**model_state.prepare_dummy_inputs(num_reqs, num_tokens),
} }
model_output = model(**model_inputs) model_output = model(**model_inputs)
if self.use_aux_hidden_state_outputs: if self.use_aux_hidden_state_outputs: