VLLM_USE_TRITON_FLASH_ATTN V0 variable deprecation (#27611)
Signed-off-by: Andreas Karatzas <akaratza@amd.com> Signed-off-by: Andreas Karatzas <Andreas.Karatzas@amd.com>
This commit is contained in:
@@ -8,7 +8,6 @@ See also `tests/kernels/moe/test_ocp_mx_moe.py`.
|
||||
"""
|
||||
|
||||
import importlib.metadata
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from importlib.util import find_spec
|
||||
|
||||
@@ -246,8 +245,6 @@ def test_mxfp4_gsm8k_correctness(config: AccuracyTestConfig):
|
||||
task = "gsm8k"
|
||||
rtol = 0.03
|
||||
|
||||
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
|
||||
|
||||
results = lm_eval.simple_evaluate(
|
||||
model="vllm",
|
||||
model_args=config.get_model_args(tp_size=8, model_max_len=38768),
|
||||
@@ -263,8 +260,6 @@ def test_mxfp4_gsm8k_correctness(config: AccuracyTestConfig):
|
||||
and measured_value + rtol > EXPECTED_VALUE
|
||||
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}"
|
||||
|
||||
del os.environ["VLLM_USE_TRITON_FLASH_ATTN"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not QUARK_MXFP4_AVAILABLE, reason="amd-quark>=0.9 is not available")
|
||||
@pytest.mark.parametrize("float_dtype", [torch.bfloat16, torch.float16])
|
||||
|
||||
Reference in New Issue
Block a user