Truncation control for embedding models (#14776)

Signed-off-by: Gabriel Marinho <gmarinho@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
2025-04-29 22:24:57 -03:00
parent 4055130a85
commit 1c2bc7ead0
21 changed files with 333 additions and 71 deletions
--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
@@ -2,7 +2,7 @@

 import asyncio
 from abc import ABC, abstractmethod
-from typing import AsyncGenerator, List, Mapping, Optional
+from typing import AsyncGenerator, Mapping, Optional

 from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function
 from vllm.config import DecodingConfig, ModelConfig, VllmConfig
@@ -256,7 +256,7 @@ class EngineClient(ABC):
    async def do_log_stats(
        self,
        scheduler_outputs: Optional[SchedulerOutputs] = None,
-        model_output: Optional[List[SamplerOutput]] = None,
+        model_output: Optional[list[SamplerOutput]] = None,
    ) -> None:
        ...