[Refactor] Clean up pooling serial utils (#33665)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-02-03 18:29:18 +08:00
parent dad2d6a590
commit 83449a5ff0
9 changed files with 417 additions and 332 deletions
--- a/examples/pooling/embed/embedding_requests_bytes_online.py
+++ b/examples/pooling/embed/embedding_requests_bytes_online.py
@@ -12,13 +12,12 @@ import json
 import requests
 import torch

-from vllm.utils.serial_utils import (
-    EMBED_DTYPE_TO_TORCH_DTYPE,
-    ENDIANNESS,
+from vllm.entrypoints.pooling.utils import (
    MetadataItem,
    build_metadata_items,
    decode_pooling_output,
 )
+from vllm.utils.serial_utils import EMBED_DTYPES, ENDIANNESS


 def post_http_request(prompt: dict, api_url: str) -> requests.Response:
@@ -51,7 +50,7 @@ def main(args):

    # The OpenAI client does not support the bytes encoding_format.
    # The OpenAI client does not support the embed_dtype and endianness parameters.
-    for embed_dtype in EMBED_DTYPE_TO_TORCH_DTYPE:
+    for embed_dtype in EMBED_DTYPES:
        for endianness in ENDIANNESS:
            prompt = {
                "model": model,
@@ -74,7 +73,7 @@ def main(args):
    # The vllm server always sorts the returned embeddings in the order of input. So
    # returning metadata is not necessary. You can set encoding_format to bytes_only
    # to let the server not return metadata.
-    for embed_dtype in EMBED_DTYPE_TO_TORCH_DTYPE:
+    for embed_dtype in EMBED_DTYPES:
        for endianness in ENDIANNESS:
            prompt = {
                "model": model,