[CI][DeepSeek] Add nightly DeepSeek R1 lm_eval tests on H200 (#30356)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Matthew Bonanni
2026-01-05 17:17:59 -05:00
committed by GitHub
parent 32f4e4db00
commit 276e03b92c
5 changed files with 33 additions and 1 deletions

View File

@@ -1351,6 +1351,14 @@ steps:
- CUDA_VISIBLE_DEVICES=1,2 VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model=Qwen/Qwen1.5-MoE-A2.7B -tp=1 -dp=2 --max-model-len=2048 --all2all-backend=deepep_high_throughput
- pytest -v -s tests/v1/distributed/test_dbo.py
- label: LM Eval Large Models (H200) # optional
timeout_in_minutes: 60
gpu: h200
optional: true
num_gpus: 8
commands:
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-h200.txt
##### B200 test #####
- label: Distributed Tests (B200) # optional
gpu: b200

View File

@@ -0,0 +1,11 @@
model_name: "deepseek-ai/DeepSeek-R1"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--data-parallel-size 8
--enable-expert-parallel
--speculative-config '{"method":"mtp","num_speculative_tokens":1}'

View File

@@ -0,0 +1,11 @@
model_name: "deepseek-ai/DeepSeek-R1"
accuracy_threshold: 0.95
num_questions: 1319
num_fewshot: 5
startup_max_wait_seconds: 1200
server_args: >-
--enforce-eager
--max-model-len 4096
--tensor-parallel-size 8
--enable-expert-parallel
--speculative-config '{"method":"mtp","num_speculative_tokens":1}'

View File

@@ -0,0 +1,2 @@
DeepSeek-R1-TP.yaml
DeepSeek-R1-DP.yaml

View File

@@ -78,7 +78,7 @@ def test_gsm8k_correctness(config_filename):
eval_config["model_name"],
server_args,
env_dict=env_dict,
max_wait_seconds=600,
max_wait_seconds=eval_config.get("startup_max_wait_seconds", 600),
) as remote_server:
server_url = remote_server.url_for("v1")
print(f"Server started at: {server_url}")