[Core] Performance: Use list[np.ndarray] instead of list[list[int]] for output tokens for GC optimization (#26368)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-14 16:04:04 -08:00
committed by GitHub
parent 58e61e56b7
commit 186352b270
12 changed files with 102 additions and 76 deletions

View File

@@ -1010,8 +1010,8 @@ class Scheduler(SchedulerInterface):
continue
req_index = model_runner_output.req_id_to_index[req_id]
generated_token_ids = (
sampled_token_ids[req_index] if sampled_token_ids else []
generated_token_ids: list[int] = (
sampled_token_ids[req_index].tolist() if sampled_token_ids else []
)
scheduled_spec_token_ids = (