Revert "[v1] Add fp32 support to v1 engine through flex attn" (#19404)

2025-06-10 16:30:20 +08:00
parent 9368cc90b2
commit 5f1ac1e1d1
4 changed files with 7 additions and 38 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1337,6 +1337,13 @@ class EngineArgs:
                               recommend_to_remove=False)
            return False

+        # Only Fp16 and Bf16 dtypes since we only support FA.
+        V1_SUPPORTED_DTYPES = [torch.bfloat16, torch.float16]
+        if model_config.dtype not in V1_SUPPORTED_DTYPES:
+            _raise_or_fallback(feature_name=f"--dtype {model_config.dtype}",
+                               recommend_to_remove=False)
+            return False
+
        # No Embedding Models so far.
        if model_config.task not in ["generate"]:
            _raise_or_fallback(feature_name=f"--task {model_config.task}",