fix bugs when token_classify & classify run concurrently (#36614)

Signed-off-by: augusto.yjh <augusto.yjh@antgroup.com>
This commit is contained in:
Augusto Yao
2026-03-11 11:16:34 +08:00
committed by GitHub
parent fe714dd507
commit b386bb3d7c

View File

@@ -47,10 +47,13 @@ class AllPool(TokenPoolingMethod):
pooling_metadata: PoolingMetadata,
) -> list[TokenPoolingMethodOutputItem]:
pooling_cursor = pooling_metadata.get_pooling_cursor()
hidden_states_all = hidden_states.split(
pooling_cursor.num_scheduled_tokens_cpu.tolist()
)
hidden_states_lst = [hidden_states_all[i] for i in pooling_cursor.index]
hidden_states_lst = [
hidden_states[first : last + 1]
for first, last in zip(
pooling_cursor.first_token_indices_gpu.tolist(),
pooling_cursor.last_token_indices_gpu.tolist(),
)
]
if not self.enable_chunked_prefill:
return hidden_states_lst