[Model] Support Pixtral models in the HF Transformers format (#9036)

2024-10-18 15:29:56 -04:00
parent 67a7e5ef38
commit 3921a2f29e
7 changed files with 503 additions and 12 deletions
--- a/examples/offline_inference_vision_language.py
+++ b/examples/offline_inference_vision_language.py
@@ -277,6 +277,22 @@ def run_qwen2_vl(question: str, modality: str):
    return llm, prompt, stop_token_ids


+# Pixtral HF-format
+def run_pixtral_hf(question: str, modality: str):
+    assert modality == "image"
+
+    model_name = "mistral-community/pixtral-12b"
+
+    llm = LLM(
+        model=model_name,
+        max_model_len=8192,
+    )
+
+    prompt = f"<s>[INST]{question}\n[IMG][/INST]"
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
 # LLama 3.2
 def run_mllama(question: str, modality: str):
    assert modality == "image"
@@ -347,6 +363,7 @@ model_example_map = {
    "NVLM_D": run_nvlm_d,
    "qwen_vl": run_qwen_vl,
    "qwen2_vl": run_qwen2_vl,
+    "pixtral_hf": run_pixtral_hf,
    "mllama": run_mllama,
    "molmo": run_molmo,
    "glm4v": run_glm4v,