[Bugfix] Fix Nemotron Parse loading (#37407)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -1,21 +1,53 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from collections.abc import Sequence
|
||||
from collections.abc import Iterable, Sequence
|
||||
|
||||
import pytest
|
||||
import regex as re
|
||||
from transformers import AutoModel
|
||||
|
||||
from tests.models.utils import check_logprobs_close
|
||||
from vllm.assets.image import ImageAsset
|
||||
from vllm.logprobs import Logprob, SampleLogprobs
|
||||
from vllm.tokenizers import TokenizerLike
|
||||
|
||||
from ....conftest import HfRunner, PromptImageInput, VllmRunner
|
||||
from ....utils import create_new_process_for_each_test
|
||||
|
||||
IMAGE = ImageAsset("paper-11").pil_image_ext(ext="png").convert("RGB")
|
||||
PROMPT = "</s><s><predict_bbox><predict_classes><output_markdown>"
|
||||
|
||||
|
||||
class DummyLogprobs(dict[int, Logprob]):
|
||||
def __init__(self, vocab_ids: Iterable[int]):
|
||||
super().__init__(dict.fromkeys(vocab_ids, Logprob(0.0)))
|
||||
|
||||
def __repr__(self):
|
||||
return "DummyLogprobs()"
|
||||
|
||||
|
||||
def mask_bbox_tokens(
|
||||
output: tuple[list[int], str, SampleLogprobs],
|
||||
tokenizer: TokenizerLike,
|
||||
) -> tuple[list[int], str, SampleLogprobs]:
|
||||
"""
|
||||
Always pass check_logprobs_close check for bounding box tokens
|
||||
because it is reasonable for them to differ slightly.
|
||||
"""
|
||||
ignore_pattern = r"<[xy]_[\d.]+>"
|
||||
vocab = tokenizer.get_vocab()
|
||||
|
||||
output_ids, output_str, out_logprobs = output
|
||||
|
||||
masked_logprobs = list[dict[int, Logprob]]()
|
||||
for token, logprobs in zip(output_ids, out_logprobs):
|
||||
if re.match(ignore_pattern, tokenizer.decode(token)):
|
||||
masked_logprobs.append(DummyLogprobs(vocab.values()))
|
||||
else:
|
||||
masked_logprobs.append(logprobs)
|
||||
|
||||
return output_ids, output_str, masked_logprobs
|
||||
|
||||
|
||||
def run_test(
|
||||
hf_runner: type[HfRunner],
|
||||
vllm_runner: type[VllmRunner],
|
||||
@@ -44,6 +76,8 @@ def run_test(
|
||||
for prompts, images in inputs
|
||||
]
|
||||
|
||||
tokenizer = vllm_model.llm.get_tokenizer()
|
||||
|
||||
with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
|
||||
hf_outputs_per_case = [
|
||||
hf_model.generate_greedy_logprobs_limit(
|
||||
@@ -58,18 +92,20 @@ def run_test(
|
||||
|
||||
for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
|
||||
check_logprobs_close(
|
||||
outputs_0_lst=hf_outputs,
|
||||
outputs_1_lst=vllm_outputs,
|
||||
outputs_0_lst=[
|
||||
mask_bbox_tokens(output, tokenizer) for output in hf_outputs
|
||||
],
|
||||
outputs_1_lst=[
|
||||
mask_bbox_tokens(output, tokenizer) for output in vllm_outputs
|
||||
],
|
||||
name_0="hf",
|
||||
name_1="vllm",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.core_model
|
||||
@pytest.mark.parametrize("model", ["nvidia/NVIDIA-Nemotron-Parse-v1.1"])
|
||||
@pytest.mark.parametrize("dtype", ["bfloat16"])
|
||||
@pytest.mark.parametrize("num_logprobs", [5])
|
||||
@create_new_process_for_each_test("spawn")
|
||||
def test_models(
|
||||
hf_runner, vllm_runner, model: str, dtype: str, num_logprobs: int
|
||||
) -> None:
|
||||
@@ -77,10 +113,7 @@ def test_models(
|
||||
hf_runner,
|
||||
vllm_runner,
|
||||
inputs=[
|
||||
(
|
||||
[PROMPT] * 10,
|
||||
[IMAGE] * 10,
|
||||
),
|
||||
([PROMPT] * 10, [IMAGE] * 10),
|
||||
],
|
||||
model=model,
|
||||
dtype=dtype,
|
||||
|
||||
Reference in New Issue
Block a user