[v1] Add Whisper model support (encoder-decoder) (#21088)

Signed-off-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: NickLucche <nlucches@redhat.com>
2025-09-10 16:53:35 -04:00
parent 4db4426404
commit 37e8182bfe
31 changed files with 429 additions and 92 deletions
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -236,11 +236,11 @@ class AiterFlashAttentionMetadataBuilder(

    def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str],
                 vllm_config: VllmConfig, device: torch.device):
-        self.vllm_config = vllm_config
+        super().__init__(kv_cache_spec, layer_names, vllm_config, device)
+
        self.model_config = vllm_config.model_config
        self.parallel_config = vllm_config.parallel_config
        self.cache_config = vllm_config.cache_config
-        self.device = device

        self.num_heads_q = self.model_config.get_num_attention_heads(
            self.parallel_config)
@@ -248,7 +248,6 @@ class AiterFlashAttentionMetadataBuilder(
            self.parallel_config)
        self.headdim = self.model_config.get_head_size()
        self.block_size = kv_cache_spec.block_size
-        self.kv_cache_spec = kv_cache_spec
        # Sliding window size to be used with the AOT scheduler will be
        # populated on first build() call.
        self.aot_sliding_window: Optional[tuple[int, int]] = None