[Refactor] Clean up pooling serial utils (#33665)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-03 18:29:18 +08:00
committed by GitHub
parent dad2d6a590
commit 83449a5ff0
9 changed files with 417 additions and 332 deletions

View File

@@ -12,11 +12,7 @@ import base64
import requests
import torch
from vllm.utils.serial_utils import (
EMBED_DTYPE_TO_TORCH_DTYPE,
ENDIANNESS,
binary2tensor,
)
from vllm.utils.serial_utils import EMBED_DTYPES, ENDIANNESS, binary2tensor
def post_http_request(prompt: dict, api_url: str) -> requests.Response:
@@ -45,7 +41,7 @@ def main(args):
] * 2
# The OpenAI client does not support the embed_dtype and endianness parameters.
for embed_dtype in EMBED_DTYPE_TO_TORCH_DTYPE:
for embed_dtype in EMBED_DTYPES:
for endianness in ENDIANNESS:
prompt = {
"model": model,