From 2be1a0f74b016a589c6392670b66a2c8413f1a6a Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:39:43 -0400 Subject: [PATCH] [Refactor] Remove dead code in pooling model (#37572) Signed-off-by: yewentao256 --- vllm/entrypoints/pooling/utils.py | 8 -------- vllm/v1/pool/metadata.py | 4 ---- 2 files changed, 12 deletions(-) diff --git a/vllm/entrypoints/pooling/utils.py b/vllm/entrypoints/pooling/utils.py index b209c7282..1af6b3508 100644 --- a/vllm/entrypoints/pooling/utils.py +++ b/vllm/entrypoints/pooling/utils.py @@ -60,14 +60,6 @@ def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]: return output.outputs.data.tolist() -def encode_pooling_output_binary( - output: PoolingRequestOutput, - embed_dtype: EmbedDType, - endianness: Endianness, -) -> bytes: - return tensor2binary(output.outputs.data, embed_dtype, endianness) - - def encode_pooling_output_base64( output: PoolingRequestOutput, embed_dtype: EmbedDType, diff --git a/vllm/v1/pool/metadata.py b/vllm/v1/pool/metadata.py index cb386decc..c9fafe142 100644 --- a/vllm/v1/pool/metadata.py +++ b/vllm/v1/pool/metadata.py @@ -14,7 +14,6 @@ pin_memory = is_pin_memory_available() @dataclass class PoolingCursor: - index: list[int] first_token_indices_gpu: torch.Tensor last_token_indices_gpu: torch.Tensor prompt_lens_cpu: torch.Tensor @@ -23,7 +22,6 @@ class PoolingCursor: def __getitem__(self, indices: slice): return PoolingCursor( - index=self.index[indices], first_token_indices_gpu=self.first_token_indices_gpu[indices], last_token_indices_gpu=self.last_token_indices_gpu[indices], prompt_lens_cpu=self.prompt_lens_cpu[indices], @@ -108,7 +106,6 @@ class PoolingMetadata: assert len(prompt_lens) == n_seq - index = list(range(n_seq)) num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np) if query_start_loc_gpu is None: cumsum = torch.zeros( @@ -130,7 +127,6 @@ class PoolingMetadata: ) cumsum = query_start_loc_gpu self.pooling_cursor = PoolingCursor( - index=index, first_token_indices_gpu=cumsum[:n_seq], last_token_indices_gpu=cumsum[1:] - 1, prompt_lens_cpu=prompt_lens,