[CI] Add Blackwell AsyncTP correctness test (#35871)
Signed-off-by: Stefano Castagnetta <scastagnetta@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
417fd28fb1
commit
d7166e74c1
@@ -36,6 +36,16 @@ steps:
|
||||
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
|
||||
- pytest -v -s tests/compile/correctness_e2e/test_async_tp.py
|
||||
|
||||
- label: AsyncTP Correctness Tests (B200)
|
||||
timeout_in_minutes: 50
|
||||
working_dir: "/vllm-workspace/"
|
||||
device: b200
|
||||
optional: true
|
||||
num_devices: 2
|
||||
commands:
|
||||
- export VLLM_TEST_CLEAN_GPU_MEMORY=1
|
||||
- pytest -v -s tests/compile/correctness_e2e/test_async_tp.py
|
||||
|
||||
- label: Distributed Compile Unit Tests (2xH100)
|
||||
timeout_in_minutes: 20
|
||||
working_dir: "/vllm-workspace/"
|
||||
|
||||
@@ -31,7 +31,12 @@ def test_async_tp_pass_correctness(
|
||||
distributed_backend: str,
|
||||
eager_mode: bool,
|
||||
num_gpus_available: int,
|
||||
monkeypatch,
|
||||
):
|
||||
# Disable FlashInfer FP8 scaled_mm kernel as it is incompatible with
|
||||
# async TP patterns. No-op on H100 (kernel requires CC >= 100).
|
||||
monkeypatch.setenv("VLLM_DISABLED_KERNELS", "FlashInferFP8ScaledMMLinearKernel")
|
||||
|
||||
model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id)
|
||||
model_info.check_transformers_version(on_fail="skip")
|
||||
model_info.check_available_online(on_fail="skip")
|
||||
|
||||
Reference in New Issue
Block a user