Truncation control for embedding models (#14776)

Signed-off-by: Gabriel Marinho <gmarinho@ibm.com>
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
Gabriel Marinho
2025-04-29 22:24:57 -03:00
committed by GitHub
parent 4055130a85
commit 1c2bc7ead0
21 changed files with 333 additions and 71 deletions

View File

@@ -2,7 +2,7 @@
import asyncio
from abc import ABC, abstractmethod
from typing import AsyncGenerator, List, Mapping, Optional
from typing import AsyncGenerator, Mapping, Optional
from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function
from vllm.config import DecodingConfig, ModelConfig, VllmConfig
@@ -256,7 +256,7 @@ class EngineClient(ABC):
async def do_log_stats(
self,
scheduler_outputs: Optional[SchedulerOutputs] = None,
model_output: Optional[List[SamplerOutput]] = None,
model_output: Optional[list[SamplerOutput]] = None,
) -> None:
...