[CI][Pooling] Stabilize ModernBERT test (#32909)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
from transformers import AutoModelForTokenClassification
|
||||
@@ -8,6 +11,20 @@ from tests.models.utils import softmax
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def seed_everything():
|
||||
"""Seed all random number generators for reproducibility."""
|
||||
seed = 0
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["boltuix/NeuroBERT-NER"])
|
||||
# The float32 is required for this tiny model to pass the test.
|
||||
@pytest.mark.parametrize("dtype", ["float"])
|
||||
@@ -51,6 +68,7 @@ def test_bert_models(
|
||||
|
||||
@pytest.mark.parametrize("model", ["disham993/electrical-ner-ModernBERT-base"])
|
||||
@pytest.mark.parametrize("dtype", ["float"])
|
||||
@pytest.mark.flaky(reruns=3)
|
||||
@torch.inference_mode
|
||||
def test_modernbert_models(
|
||||
hf_runner,
|
||||
@@ -59,6 +77,15 @@ def test_modernbert_models(
|
||||
model: str,
|
||||
dtype: str,
|
||||
) -> None:
|
||||
# NOTE: https://github.com/vllm-project/vllm/pull/32403
|
||||
# `disham993/electrical-ner-ModernBERT-base` is a randomly initialized
|
||||
# model, which can cause numerical precision variance and edge cases.
|
||||
# We use @flaky(reruns=3) to mitigate intermittent failures.
|
||||
print(
|
||||
f"\n[NOTE] Testing {model} (randomly initialized weights) - "
|
||||
"flaky tolerance enabled due to numerical precision variance."
|
||||
)
|
||||
|
||||
with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model:
|
||||
vllm_outputs = vllm_model.token_classify(example_prompts)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user