[Bugfix] Fix Nemotron Parse loading (#37407)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-03-19 17:55:29 +08:00
committed by GitHub
parent 6a9cceb219
commit 765e461065
4 changed files with 49 additions and 19 deletions

View File

@@ -1,21 +1,53 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
import pytest
import regex as re
from transformers import AutoModel
from tests.models.utils import check_logprobs_close
from vllm.assets.image import ImageAsset
from vllm.logprobs import Logprob, SampleLogprobs
from vllm.tokenizers import TokenizerLike
from ....conftest import HfRunner, PromptImageInput, VllmRunner
from ....utils import create_new_process_for_each_test
IMAGE = ImageAsset("paper-11").pil_image_ext(ext="png").convert("RGB")
PROMPT = "</s><s><predict_bbox><predict_classes><output_markdown>"
class DummyLogprobs(dict[int, Logprob]):
def __init__(self, vocab_ids: Iterable[int]):
super().__init__(dict.fromkeys(vocab_ids, Logprob(0.0)))
def __repr__(self):
return "DummyLogprobs()"
def mask_bbox_tokens(
output: tuple[list[int], str, SampleLogprobs],
tokenizer: TokenizerLike,
) -> tuple[list[int], str, SampleLogprobs]:
"""
Always pass check_logprobs_close check for bounding box tokens
because it is reasonable for them to differ slightly.
"""
ignore_pattern = r"<[xy]_[\d.]+>"
vocab = tokenizer.get_vocab()
output_ids, output_str, out_logprobs = output
masked_logprobs = list[dict[int, Logprob]]()
for token, logprobs in zip(output_ids, out_logprobs):
if re.match(ignore_pattern, tokenizer.decode(token)):
masked_logprobs.append(DummyLogprobs(vocab.values()))
else:
masked_logprobs.append(logprobs)
return output_ids, output_str, masked_logprobs
def run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
@@ -44,6 +76,8 @@ def run_test(
for prompts, images in inputs
]
tokenizer = vllm_model.llm.get_tokenizer()
with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
hf_outputs_per_case = [
hf_model.generate_greedy_logprobs_limit(
@@ -58,18 +92,20 @@ def run_test(
for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
outputs_0_lst=[
mask_bbox_tokens(output, tokenizer) for output in hf_outputs
],
outputs_1_lst=[
mask_bbox_tokens(output, tokenizer) for output in vllm_outputs
],
name_0="hf",
name_1="vllm",
)
@pytest.mark.core_model
@pytest.mark.parametrize("model", ["nvidia/NVIDIA-Nemotron-Parse-v1.1"])
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("num_logprobs", [5])
@create_new_process_for_each_test("spawn")
def test_models(
hf_runner, vllm_runner, model: str, dtype: str, num_logprobs: int
) -> None:
@@ -77,10 +113,7 @@ def test_models(
hf_runner,
vllm_runner,
inputs=[
(
[PROMPT] * 10,
[IMAGE] * 10,
),
([PROMPT] * 10, [IMAGE] * 10),
],
model=model,
dtype=dtype,