[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-10-31 10:10:52 -06:00
parent 5608e611c2
commit 16b8f7a86f
9 changed files with 106 additions and 52 deletions
--- a/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
@@ -56,6 +56,17 @@ def qwen_vllm_to_hf_output(
    return output_ids, hf_output_str, out_logprobs


+def qwen2_vllm_to_hf_output(
+        vllm_output: RunnerOutput,
+        model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
+    """Sanitize vllm output [qwen2 models] to be comparable with hf output."""
+    output_ids, output_str, out_logprobs = vllm_output
+
+    hf_output_str = output_str + "<|im_end|>"
+
+    return output_ids, hf_output_str, out_logprobs
+
+
 def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
                                  model: str) -> RunnerOutput:
    config = AutoConfig.from_pretrained(model)
--- a/tests/models/decoder_only/vision_language/vlm_utils/runners.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/runners.py
@@ -29,6 +29,7 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
        num_logprobs=test_case.num_logprobs,
        limit_mm_per_prompt={"image": 1},
        distributed_executor_backend=test_case.distributed_executor_backend,
+        runner_mm_key="images",
        **model_test_info.get_non_parametrized_runner_kwargs())


@@ -51,6 +52,7 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
        num_logprobs=test_case.num_logprobs,
        limit_mm_per_prompt={"image": len(image_assets)},
        distributed_executor_backend=test_case.distributed_executor_backend,
+        runner_mm_key="images",
        **model_test_info.get_non_parametrized_runner_kwargs())


@@ -74,6 +76,7 @@ def run_embedding_test(*, model_test_info: VLMTestInfo,
        limit_mm_per_prompt={"image": 1},
        vllm_embeddings=vllm_embeddings,
        distributed_executor_backend=test_case.distributed_executor_backend,
+        runner_mm_key="images",
        **model_test_info.get_non_parametrized_runner_kwargs())


@@ -101,6 +104,7 @@ def run_video_test(
        num_logprobs=test_case.num_logprobs,
        limit_mm_per_prompt={"video": len(video_assets)},
        distributed_executor_backend=test_case.distributed_executor_backend,
+        runner_mm_key="videos",
        **model_test_info.get_non_parametrized_runner_kwargs())


@@ -115,7 +119,11 @@ def run_custom_inputs_test(*, model_test_info: VLMTestInfo,

    inputs = test_case.custom_test_opts.inputs
    limit_mm_per_prompt = test_case.custom_test_opts.limit_mm_per_prompt
-    assert inputs is not None and limit_mm_per_prompt is not None
+    runner_mm_key = test_case.custom_test_opts.runner_mm_key
+    # Inputs, limit_mm_per_prompt, and runner_mm_key should all be set
+    assert inputs is not None
+    assert limit_mm_per_prompt is not None
+    assert runner_mm_key is not None

    core.run_test(
        hf_runner=hf_runner,
@@ -127,4 +135,5 @@ def run_custom_inputs_test(*, model_test_info: VLMTestInfo,
        num_logprobs=test_case.num_logprobs,
        limit_mm_per_prompt=limit_mm_per_prompt,
        distributed_executor_backend=test_case.distributed_executor_backend,
+        runner_mm_key=runner_mm_key,
        **model_test_info.get_non_parametrized_runner_kwargs())
--- a/tests/models/decoder_only/vision_language/vlm_utils/types.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/types.py
@@ -52,6 +52,8 @@ class SizeType(Enum):
 class CustomTestOptions(NamedTuple):
    inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]]
    limit_mm_per_prompt: Dict[str, int]
+    # kwarg to pass multimodal data in as to vllm/hf runner instances.
+    runner_mm_key: str = "images"


 class ImageSizeWrapper(NamedTuple):
@@ -141,9 +143,6 @@ class VLMTestInfo(NamedTuple):
        Callable[[PosixPath, str, Union[List[ImageAsset], _ImageAssets]],
                 str]] = None  # noqa: E501

-    # kwarg to pass multimodal data in as to vllm/hf runner instances
-    runner_mm_key: str = "images"
-
    # Allows configuring a test to run with custom inputs
    custom_test_opts: Optional[List[CustomTestOptions]] = None

@@ -168,7 +167,6 @@ class VLMTestInfo(NamedTuple):
            "get_stop_token_ids": self.get_stop_token_ids,
            "model_kwargs": self.model_kwargs,
            "patch_hf_runner": self.patch_hf_runner,
-            "runner_mm_key": self.runner_mm_key,
        }