[CI/Build] Reorganize models tests (#7820)

2024-09-14 01:20:06 +08:00
parent 0a4806f0a9
commit a84e598e21
55 changed files with 415 additions and 498 deletions
--- a/tests/models/decoder_only/vision_language/test_chameleon.py
+++ b/tests/models/decoder_only/vision_language/test_chameleon.py
@@ -0,0 +1,125 @@
+from typing import List, Optional, Type
+
+import pytest
+from transformers import AutoModelForVision2Seq, BatchEncoding
+
+from vllm.multimodal.utils import rescale_image_size
+from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
+
+from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
+from ...utils import check_outputs_equal
+
+HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
+    "stop_sign":
+    "USER: <image>\nWhat's the content of the image?\nASSISTANT:",
+    "cherry_blossom":
+    "USER: <image>\nWhat is the season?\nASSISTANT:",
+})
+
+models = ["facebook/chameleon-7b"]
+
+
+def run_test(
+    hf_runner: Type[HfRunner],
+    vllm_runner: Type[VllmRunner],
+    image_assets: _ImageAssets,
+    model: str,
+    *,
+    size_factors: List[float],
+    dtype: str,
+    max_tokens: int,
+    num_logprobs: int,
+    tensor_parallel_size: int,
+    distributed_executor_backend: Optional[str] = None,
+):
+    """Inference result should be the same between hf and vllm.
+
+    All the image fixtures for the test are from IMAGE_ASSETS.
+    For huggingface runner, we provide the PIL images as input.
+    For vllm runner, we provide MultiModalDataDict objects 
+    and corresponding vision language config as input.
+    Note, the text input is also adjusted to abide by vllm contract.
+    The text output is sanitized to be able to compare with hf.
+    """
+    torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
+    images = [asset.pil_image for asset in image_assets]
+
+    inputs_per_image = [(
+        [prompt for _ in size_factors],
+        [rescale_image_size(image, factor) for factor in size_factors],
+    ) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
+
+    with vllm_runner(model,
+                     max_model_len=4096,
+                     dtype=dtype,
+                     tensor_parallel_size=tensor_parallel_size,
+                     distributed_executor_backend=distributed_executor_backend,
+                     enforce_eager=True) as vllm_model:
+
+        vllm_outputs_per_image = [
+            vllm_model.generate_greedy_logprobs(prompts,
+                                                max_tokens,
+                                                num_logprobs=num_logprobs,
+                                                images=images)
+            for prompts, images in inputs_per_image
+        ]
+
+    def process(hf_inputs: BatchEncoding):
+        hf_inputs["pixel_values"] = hf_inputs["pixel_values"] \
+            .to(torch_dtype)  # type: ignore
+        return hf_inputs
+
+    with hf_runner(model,
+                   dtype=dtype,
+                   postprocess_inputs=process,
+                   auto_cls=AutoModelForVision2Seq) as hf_model:
+        hf_outputs_per_image = [
+            hf_model.generate_greedy_logprobs_limit(prompts,
+                                                    max_tokens,
+                                                    num_logprobs=num_logprobs,
+                                                    images=images)
+            for prompts, images in inputs_per_image
+        ]
+
+    for hf_outputs, vllm_outputs in zip(hf_outputs_per_image,
+                                        vllm_outputs_per_image):
+        # HF Logprobs include image tokens, unlike vLLM, so we don't directly
+        # compare them
+        check_outputs_equal(
+            outputs_0_lst=[outputs[:2] for outputs in hf_outputs],
+            outputs_1_lst=[outputs[:2] for outputs in vllm_outputs],
+            name_0="hf",
+            name_1="vllm",
+        )
+
+
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize(
+    "size_factors",
+    [
+        # No image
+        [],
+        # Single-scale
+        [1.0],
+        # Single-scale, batched
+        [1.0, 1.0, 1.0],
+        # Multi-scale
+        [0.25, 0.5, 1.0],
+    ],
+)
+@pytest.mark.parametrize("dtype", ["bfloat16"])
+@pytest.mark.parametrize("max_tokens", [8])
+@pytest.mark.parametrize("num_logprobs", [5])
+def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
+                dtype, max_tokens, num_logprobs) -> None:
+    run_test(
+        hf_runner,
+        vllm_runner,
+        image_assets,
+        model,
+        size_factors=size_factors,
+        dtype=dtype,
+        max_tokens=max_tokens,
+        num_logprobs=num_logprobs,
+        tensor_parallel_size=1,
+    )