[Misc] Update code for encoder-decoder models (#33900)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-06 11:38:39 +08:00
committed by GitHub
parent a32cb49b60
commit 035a6cb09a
2 changed files with 9 additions and 3 deletions

View File

@@ -1098,7 +1098,7 @@ class MultiModalEncDecInputs(MultiModalInputs):
Note: Even text-only encoder-decoder models are currently implemented
as multi-modal models for convenience.
(Example: https://github.com/neuralmagic/bart-plugin)
(Example: https://github.com/vllm-project/bart-plugin)
"""
encoder_prompt_token_ids: list[int]

View File

@@ -185,7 +185,13 @@ class Scheduler(SchedulerInterface):
# NOTE: Text-only encoder-decoder models are implemented as
# multi-modal models for convenience
# Example: https://github.com/neuralmagic/bart-plugin
# Example: https://github.com/vllm-project/bart-plugin
if self.is_encoder_decoder:
assert mm_budget and len(mm_budget.mm_max_toks_per_item) <= 1, (
"Encoder-decoder models are expected to implement the "
"multimodal interface with at most one modality."
)
self.max_num_encoder_input_tokens = (
mm_budget.encoder_compute_budget if mm_budget else 0
)
@@ -200,7 +206,7 @@ class Scheduler(SchedulerInterface):
# TODO (NickLucche): Generalize to models with variable-length encoder inputs.
self._num_encoder_max_input_tokens = (
mm_budget.mm_max_toks_per_item[mm_budget.get_modality_with_max_tokens()]
if mm_budget
if mm_budget and mm_budget.mm_max_toks_per_item
else 0
)