[Model Runner V2] Remove unnecessary copies in PW CUDA graph capture (#34849)

Signed-off-by: Woosuk Kwon <woosuk@inferact.ai>
This commit is contained in:
Woosuk Kwon
2026-02-18 15:52:50 -08:00
committed by GitHub
parent 2b84ac669c
commit c878b43b64

View File

@@ -218,13 +218,11 @@ class CudaGraphManager:
batch_descriptor=batch_descriptor,
slot_mapping=slot_mappings,
):
hidden_states = model(
model(
input_ids=input_ids,
positions=positions,
inputs_embeds=inputs_embeds,
)
assert self.hidden_states is not None
self.hidden_states[:num_tokens] = hidden_states
@torch.inference_mode()
def capture(