[Model Runner V2] Add model_state inputs to CUDA graph capture (#36544)
Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
@@ -320,6 +320,7 @@ class ModelCudaGraphManager(CudaGraphManager):
|
|||||||
model_inputs = {
|
model_inputs = {
|
||||||
"input_ids": input_buffers.input_ids[:num_tokens],
|
"input_ids": input_buffers.input_ids[:num_tokens],
|
||||||
"positions": input_buffers.positions[:num_tokens],
|
"positions": input_buffers.positions[:num_tokens],
|
||||||
|
**model_state.prepare_dummy_inputs(num_reqs, num_tokens),
|
||||||
}
|
}
|
||||||
model_output = model(**model_inputs)
|
model_output = model(**model_inputs)
|
||||||
if self.use_aux_hidden_state_outputs:
|
if self.use_aux_hidden_state_outputs:
|
||||||
|
|||||||
Reference in New Issue
Block a user