[Refactor] Remove dead code in pooling model (#37572)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-03-19 19:39:43 -04:00
committed by GitHub
parent 4120a05ff1
commit 2be1a0f74b
2 changed files with 0 additions and 12 deletions

View File

@@ -60,14 +60,6 @@ def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]:
return output.outputs.data.tolist()
def encode_pooling_output_binary(
output: PoolingRequestOutput,
embed_dtype: EmbedDType,
endianness: Endianness,
) -> bytes:
return tensor2binary(output.outputs.data, embed_dtype, endianness)
def encode_pooling_output_base64(
output: PoolingRequestOutput,
embed_dtype: EmbedDType,

View File

@@ -14,7 +14,6 @@ pin_memory = is_pin_memory_available()
@dataclass
class PoolingCursor:
index: list[int]
first_token_indices_gpu: torch.Tensor
last_token_indices_gpu: torch.Tensor
prompt_lens_cpu: torch.Tensor
@@ -23,7 +22,6 @@ class PoolingCursor:
def __getitem__(self, indices: slice):
return PoolingCursor(
index=self.index[indices],
first_token_indices_gpu=self.first_token_indices_gpu[indices],
last_token_indices_gpu=self.last_token_indices_gpu[indices],
prompt_lens_cpu=self.prompt_lens_cpu[indices],
@@ -108,7 +106,6 @@ class PoolingMetadata:
assert len(prompt_lens) == n_seq
index = list(range(n_seq))
num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np)
if query_start_loc_gpu is None:
cumsum = torch.zeros(
@@ -130,7 +127,6 @@ class PoolingMetadata:
)
cumsum = query_start_loc_gpu
self.pooling_cursor = PoolingCursor(
index=index,
first_token_indices_gpu=cumsum[:n_seq],
last_token_indices_gpu=cumsum[1:] - 1,
prompt_lens_cpu=prompt_lens,