[V0 Deprecation] Remove V0 FlashInfer attention backend (#22776)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
@@ -32,7 +32,7 @@ from ..utils import check_logprobs_close
|
||||
# Due to low-precision numerical divergence, we only test logprob of 4 tokens
|
||||
@pytest.mark.parametrize("max_tokens", [4])
|
||||
@pytest.mark.parametrize("enforce_eager", [True])
|
||||
@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS", "FLASHINFER"])
|
||||
@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS"])
|
||||
# NOTE: Increasing this in this suite will fail CI because we currently cannot
|
||||
# reset distributed env properly. Use a value > 1 just when you test.
|
||||
@pytest.mark.parametrize("tensor_parallel_size", [1])
|
||||
@@ -57,9 +57,6 @@ def test_models(
|
||||
numerical sensitive kernels.
|
||||
"""
|
||||
|
||||
if backend == "FLASHINFER" and current_platform.is_rocm():
|
||||
pytest.skip("Flashinfer does not support ROCm/HIP.")
|
||||
|
||||
if kv_cache_dtype == "fp8_e5m2" and current_platform.is_rocm():
|
||||
pytest.skip(
|
||||
f"{kv_cache_dtype} is currently not supported on ROCm/HIP.")
|
||||
|
||||
Reference in New Issue
Block a user