[Feature] Batch-Invariant Support for FA2 and LoRA (#30018)

Signed-off-by: quanliu <18646313696@163.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
2025-12-09 23:01:38 +08:00
parent 5c213d2899
commit 5dcd593baf
3 changed files with 23 additions and 3 deletions
--- a/tests/v1/determinism/utils.py
+++ b/tests/v1/determinism/utils.py
@@ -11,8 +11,10 @@ from vllm.platforms import current_platform
 from vllm.utils.flashinfer import has_flashinfer

 skip_unsupported = pytest.mark.skipif(
-    not (current_platform.is_cuda() and current_platform.has_device_capability(90)),
-    reason="Requires CUDA and >= Hopper (SM90)",
+    not (current_platform.is_cuda() and current_platform.has_device_capability(80)),
+    # Supports testing on Ampere and Ada Lovelace devices.
+    # Note: For devices with SM < 90, batch invariance does not support CUDA Graphs.
+    reason="Requires CUDA and >= Ampere (SM80)",
 )

 BACKENDS: list[str] = [
@@ -97,3 +99,7 @@ def _extract_step_logprobs(request_output):
            return t, inner.token_ids

    return None, None
+
+
+def is_device_capability_below_90() -> bool:
+    return not current_platform.has_device_capability(90)