[Model] Support DeepSeek-OCR-2 (#33165)

Signed-off-by: liuli <ll407707@alibaba-inc.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: liuli <ll407707@alibaba-inc.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-02-02 14:24:10 +08:00
parent beb8899482
commit 808dd87b30
9 changed files with 1099 additions and 1 deletions
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -270,6 +270,49 @@ def run_deepseek_ocr(questions: list[str], modality: str) -> ModelRequestData:
    )


+def run_deepseek_ocr2(questions: list[str], modality: str) -> ModelRequestData:
+    from vllm.model_executor.models.deepseek_ocr import NGramPerReqLogitsProcessor
+
+    assert modality == "image"
+
+    model_name = "deepseek-ai/DeepSeek-OCR-2"
+
+    engine_args = EngineArgs(
+        model=model_name,
+        limit_mm_per_prompt={modality: 1},
+        logits_processors=[NGramPerReqLogitsProcessor],
+    )
+
+    # deepseek-ocr use plain prompt template
+    prompts = [f"<image>\n{question}" for question in questions]
+
+    # The following sampling params config is taken from
+    # the official Deepseek-OCR inference example.
+    # (IMPORTANT) Use the custom logits processor and avoid skipping
+    # special tokens for this model for the optimal OCR performance.
+    sampling_params = [
+        SamplingParams(
+            temperature=0.0,
+            max_tokens=8192,
+            # ngram logit processor args
+            extra_args=dict(
+                ngram_size=30,
+                window_size=90,
+                # whitelist: <td>, </td>
+                whitelist_token_ids={128821, 128822},
+            ),
+            skip_special_tokens=False,
+        )
+        for _ in questions
+    ]
+
+    return ModelRequestData(
+        engine_args=engine_args,
+        prompts=prompts,
+        sampling_params=sampling_params,
+    )
+
+
 # Dots-OCR
 def run_dots_ocr(questions: list[str], modality: str) -> ModelRequestData:
    assert modality == "image"
@@ -2045,6 +2088,7 @@ model_example_map = {
    "command_a_vision": run_command_a_vision,
    "deepseek_vl_v2": run_deepseek_vl2,
    "deepseek_ocr": run_deepseek_ocr,
+    "deepseek_ocr2": run_deepseek_ocr2,
    "dots_ocr": run_dots_ocr,
    "eagle2_5": run_eagle2_5,
    "ernie45_vl": run_ernie45_vl,