[Model] Add support for ModernBertForTokenClassification (#26340)

Signed-off-by: Antoine Recanati Le Goat <antoine.recanati@sancare.fr> Signed-off-by: antrec <antoine.recanati@gmail.com> Co-authored-by: Antoine Recanati Le Goat <antoine.recanati@sancare.fr> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-10-07 16:29:19 +02:00
parent 41f1cf38f2
commit 6f59beaf0b
5 changed files with 112 additions and 2 deletions
--- a/tests/models/language/pooling/test_token_classification.py
+++ b/tests/models/language/pooling/test_token_classification.py
@@ -11,7 +11,38 @@ from tests.models.utils import softmax
 # The float32 is required for this tiny model to pass the test.
@pytest.mark.parametrize("dtype", ["float"])
@torch.inference_mode
-def test_models(
+def test_bert_models(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+) -> None:
+    with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model:
+        vllm_outputs = vllm_model.encode(example_prompts)
+
+    with hf_runner(
+        model, dtype=dtype, auto_cls=AutoModelForTokenClassification
+    ) as hf_model:
+        tokenizer = hf_model.tokenizer
+        hf_outputs = []
+        for prompt in example_prompts:
+            inputs = tokenizer([prompt], return_tensors="pt")
+            inputs = hf_model.wrap_device(inputs)
+            output = hf_model.model(**inputs)
+            hf_outputs.append(softmax(output.logits[0]))
+
+    # check logits difference
+    for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
+        hf_output = torch.tensor(hf_output).cpu().float()
+        vllm_output = torch.tensor(vllm_output).cpu().float()
+        assert torch.allclose(hf_output, vllm_output, 1e-2)
+
+
+@pytest.mark.parametrize("model", ["disham993/electrical-ner-ModernBERT-base"])
+@pytest.mark.parametrize("dtype", ["float"])
+@torch.inference_mode
+def test_modernbert_models(
    hf_runner,
    vllm_runner,
    example_prompts,