[CI/Build] Reuse code for checking output consistency (#5988)

This commit is contained in:
Cyrus Leung
2024-06-30 11:44:25 +08:00
committed by GitHub
parent bcc6a09b63
commit cff6a1fec1
11 changed files with 125 additions and 75 deletions

View File

@@ -12,6 +12,8 @@ from vllm import SamplingParams
from vllm.core.scheduler import (ARTIFICIAL_PREEMPTION_MAX_CNT,
ENABLE_ARTIFICIAL_PREEMPT)
from ..models.utils import check_outputs_equal
MODELS = [
"facebook/opt-125m",
]
@@ -94,13 +96,13 @@ def test_preemption(
total_preemption = (
vllm_model.model.llm_engine.scheduler.num_cumulative_preemption)
for i in range(len(example_prompts)):
hf_output_ids, hf_output_str = hf_outputs[i]
vllm_output_ids, vllm_output_str = vllm_outputs[i]
assert hf_output_str == vllm_output_str, (
f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}")
assert hf_output_ids == vllm_output_ids, (
f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}")
check_outputs_equal(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
assert ("is preempted by PreemptionMode.RECOMPUTE mode because there "
"is not enough KV cache space." in caplog_vllm.text)
# Ensure the count bucket of request-level histogram metrics matches