[Core] Performance: Use list[np.ndarray] instead of list[list[int]] for output tokens for GC optimization (#26368)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-14 16:04:04 -08:00
committed by GitHub
parent 58e61e56b7
commit 186352b270
12 changed files with 102 additions and 76 deletions

View File

@@ -3,6 +3,7 @@
from unittest import mock
import numpy as np
import pytest
import torch
@@ -112,7 +113,9 @@ def test_prepare_next_token_ids():
sampled_token_ids_tensor = torch.tensor(
sampled_token_ids, dtype=torch.int32, device=device
)
sampled_token_ids_cpu = [[i for i in seq if i != -1] for seq in sampled_token_ids]
sampled_token_ids_cpu = [
np.array([i for i in seq if i != -1]) for seq in sampled_token_ids
]
expected_next_token_ids_cpu = [1, 4, 30, 40]
expected_next_token_ids_tensor = torch.tensor(