[Attention] Refactor FA block_size limitations to hybrid models only (#29084)

Signed-off-by: NickLucche <nlucches@redhat.com>
2025-11-22 15:38:44 +01:00
parent 5f7209a793
commit 066209a045
17 changed files with 82 additions and 32 deletions
--- a/tests/v1/worker/test_gpu_model_runner.py
+++ b/tests/v1/worker/test_gpu_model_runner.py
@@ -185,7 +185,9 @@ def _make_mock_backend_for_kernel_block_size(
    supported_sizes: list[int | MultipleOf],
 ):
    class _MockBackend:
-        supported_kernel_block_sizes = supported_sizes
+        @staticmethod
+        def get_supported_kernel_block_sizes():
+            return supported_sizes

    return _MockBackend()