[Core] Performance: Use list[np.ndarray] instead of list[list[int]] for output tokens for GC optimization (#26368)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-14 16:04:04 -08:00
committed by GitHub
parent 58e61e56b7
commit 186352b270
12 changed files with 102 additions and 76 deletions

View File

@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections import deque
import numpy as np
import pytest
from vllm.v1.core.sched.output import SchedulerOutput
@@ -21,7 +22,7 @@ def _make_model_runner_output(
return ModelRunnerOutput(
req_ids=req_ids,
req_id_to_index={req_id: i for i, req_id in enumerate(req_ids)},
sampled_token_ids=[[i] for i in range(len(req_ids))],
sampled_token_ids=[np.array([i]) for i in range(len(req_ids))],
logprobs=None,
prompt_logprobs_dict={},
pooler_output=[],