[Renderer] Separate out RendererConfig from ModelConfig (#30145)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-12-07 15:15:42 +08:00
parent a49d813fa8
commit 27f4c2fd46
105 changed files with 969 additions and 797 deletions
--- a/vllm/model_executor/models/gemma3n_mm.py
+++ b/vllm/model_executor/models/gemma3n_mm.py
@@ -18,7 +18,7 @@ from transformers.models.gemma3n import (
 )
 from transformers.models.siglip import SiglipImageProcessorFast

-from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
+from vllm.config import RendererConfig, SpeechToTextConfig, VllmConfig
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.inputs.data import PromptType
 from vllm.logger import init_logger
@@ -760,7 +760,7 @@ class Gemma3nForConditionalGeneration(
        cls,
        audio: np.ndarray,
        stt_config: SpeechToTextConfig,
-        model_config: ModelConfig,
+        renderer_config: RendererConfig,
        language: Optional[str],
        task_type: Literal["transcribe", "translate"],
        request_prompt: str,
@@ -798,7 +798,9 @@ class Gemma3nForConditionalGeneration(

    @classmethod
    def get_speech_to_text_config(
-        cls, model_config: ModelConfig, task_type: str
+        cls,
+        renderer_config: RendererConfig,
+        task_type: str,
    ) -> SpeechToTextConfig:
        return SpeechToTextConfig(
            # Let's set this to 30 as suggested in the docs for now, although