[Renderer] Separate out RendererConfig from ModelConfig (#30145)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-07 15:15:42 +08:00
committed by GitHub
parent a49d813fa8
commit 27f4c2fd46
105 changed files with 969 additions and 797 deletions

View File

@@ -18,7 +18,7 @@ from transformers.models.gemma3n import (
)
from transformers.models.siglip import SiglipImageProcessorFast
from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
from vllm.config import RendererConfig, SpeechToTextConfig, VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs.data import PromptType
from vllm.logger import init_logger
@@ -760,7 +760,7 @@ class Gemma3nForConditionalGeneration(
cls,
audio: np.ndarray,
stt_config: SpeechToTextConfig,
model_config: ModelConfig,
renderer_config: RendererConfig,
language: Optional[str],
task_type: Literal["transcribe", "translate"],
request_prompt: str,
@@ -798,7 +798,9 @@ class Gemma3nForConditionalGeneration(
@classmethod
def get_speech_to_text_config(
cls, model_config: ModelConfig, task_type: str
cls,
renderer_config: RendererConfig,
task_type: str,
) -> SpeechToTextConfig:
return SpeechToTextConfig(
# Let's set this to 30 as suggested in the docs for now, although