[Feature] Add vision language model support. (#3042)

2024-03-25 14:16:30 -07:00
parent f408d05c52
commit 64172a976c
28 changed files with 936 additions and 94 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -40,3 +40,17 @@ def get_config(model: str,
                                              revision=revision,
                                              code_revision=code_revision)
    return config
+
+
+def get_hf_text_config(config: PretrainedConfig):
+    """Get the "sub" config relevant to llm for multi modal models.
+        No op for pure text models.
+    """
+    if hasattr(config, "text_config"):
+        # The code operates under the assumption that text_config should have
+        # `num_attention_heads` (among others). Assert here to fail early
+        # if transformers config doesn't align with this assumption.
+        assert hasattr(config.text_config, "num_attention_heads")
+        return config.text_config
+    else:
+        return config