[Model] Use explicit types in get_generation_prompt (#33551)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-02 20:38:49 +08:00
committed by GitHub
parent b398e5c819
commit b10d05b8a8
8 changed files with 82 additions and 66 deletions

View File

@@ -26,7 +26,7 @@
import math
from collections.abc import Iterable, Mapping
from typing import Annotated, Literal, cast
from typing import Annotated, Literal
import numpy as np
import torch
@@ -36,7 +36,7 @@ from transformers import BatchFeature, PretrainedConfig
from vllm.config import CacheConfig, ModelConfig, SpeechToTextConfig, VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs.data import PromptType
from vllm.inputs.data import PromptType, TokensPrompt
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.models.module_mapping import MultiModelKeys
@@ -879,11 +879,11 @@ class GraniteSpeechForConditionalGeneration(
)
prompt_token_ids = tokenizer.encode(prompt)
prompt = {
"prompt_token_ids": prompt_token_ids,
"multi_modal_data": {"audio": audio},
}
return cast(PromptType, prompt)
return TokensPrompt(
prompt_token_ids=prompt_token_ids,
multi_modal_data={"audio": audio},
)
# Adapted from https://github.com/huggingface/transformers/blob/v4.56.0/src/transformers/models/granite_speech/feature_extraction_granite_speech.py#L122 # noqa: E501
@classmethod