[Bugfix] Enable Kimi k25 processor test (#33562)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-02-02 22:25:25 +08:00
parent 0aca8b8c62
commit 4061dcf4c5
4 changed files with 96 additions and 12 deletions
--- a/tests/models/multimodal/processing/test_common.py
+++ b/tests/models/multimodal/processing/test_common.py
@@ -214,6 +214,28 @@ def get_text_token_prompts(
    return text_prompt, token_prompt


+def random_vision_chunk(
+    rng: np.random.RandomState,
+    min_wh: int,
+    max_wh: int,
+    min_frames: int,
+    max_frames: int,
+) -> dict:
+    num_frames = rng.randint(min_frames, max_frames + 1)
+    if num_frames == 1:
+        # Single image chunk
+        wh = rng.randint(min_wh, max_wh + 1)
+        image = random_image(rng, wh, wh + 1)
+        return {"type": "image", "image": image}
+    frames = []
+    for _ in range(num_frames):
+        wh = rng.randint(min_wh, max_wh + 1)
+        frame = rng.randint(0, 256, size=(wh, wh, 3), dtype=np.uint8)
+        frames.append(frame)
+    video_array = np.stack(frames, axis=0)
+    return {"type": "video_chunk", "video_chunk": video_array}
+
+
 def _test_processing_correctness(
    model_id_or_arch: str,
    hit_rate: float,
@@ -291,6 +313,7 @@ def _test_processing_correctness(
        "image": Image.new("RGB", size=(128, 128)),
        "video": np.zeros((4, 128, 128, 3), dtype=np.uint8),
        "audio": (np.zeros((512,)), 16000),
+        "vision_chunk": {"type": "image", "image": Image.new("RGB", size=(128, 128))},
    }
    input_factory = {
        "image": partial(random_image, rng, min_wh=128, max_wh=256),
@@ -298,6 +321,9 @@ def _test_processing_correctness(
            random_video, rng, min_frames=2, max_frames=16, min_wh=128, max_wh=256
        ),
        "audio": partial(random_audio, rng, min_len=512, max_len=1024, sr=16000),
+        "vision_chunk": partial(
+            random_vision_chunk, rng, min_wh=128, max_wh=256, min_frames=1, max_frames=1
+        ),
    }

    for batch_idx in range(num_batches):
@@ -413,11 +439,6 @@ def test_processing_correctness(
            "Qwen-VL tokenizer requires downloading a font file from "
            "servers that often refuse connections in CI"
        )
-    if model_id == "moonshotai/Kimi-K2.5":
-        # FIXME(Isaac): Fix Kimi-K2.5's offline inference about vision chunks.
-        pytest.skip(
-            "Kimi-K2.5's offline inference has issues about vision chunks. Fix later."
-        )

    _test_processing_correctness(
        model_id,