Revert "[v1] Add fp32 support to v1 engine through flex attn" (#19404)

This commit is contained in:
Isotr0py
2025-06-10 16:30:20 +08:00
committed by GitHub
parent 9368cc90b2
commit 5f1ac1e1d1
4 changed files with 7 additions and 38 deletions

View File

@@ -1337,6 +1337,13 @@ class EngineArgs:
recommend_to_remove=False)
return False
# Only Fp16 and Bf16 dtypes since we only support FA.
V1_SUPPORTED_DTYPES = [torch.bfloat16, torch.float16]
if model_config.dtype not in V1_SUPPORTED_DTYPES:
_raise_or_fallback(feature_name=f"--dtype {model_config.dtype}",
recommend_to_remove=False)
return False
# No Embedding Models so far.
if model_config.task not in ["generate"]:
_raise_or_fallback(feature_name=f"--task {model_config.task}",