[Core] Performance: Use list[np.ndarray] instead of list[list[int]] for output tokens for GC optimization (#26368)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-14 16:04:04 -08:00
committed by GitHub
parent 58e61e56b7
commit 186352b270
12 changed files with 102 additions and 76 deletions

View File

@@ -7,6 +7,7 @@ from dataclasses import dataclass
from itertools import chain, count
from typing import Any
import numpy as np
import torch
from vllm import SamplingParams
@@ -228,7 +229,7 @@ def create_model_runner_output(
# Make sampled tokens.
sampled_token = EOS_TOKEN_ID if use_eos else token_id
sampled_token_ids = [[sampled_token] for _ in req_ids]
sampled_token_ids = [np.array([sampled_token]) for _ in req_ids]
kv_connector_output = (
None