[Model] VLM2Vec, the first multimodal embedding model in vLLM (#9303)

This commit is contained in:
Cyrus Leung
2024-10-16 14:31:00 +08:00
committed by GitHub
parent 7e7eae338d
commit 7abba39ee6
16 changed files with 465 additions and 261 deletions

View File

@@ -237,7 +237,16 @@ class ModelConfig:
def _verify_embedding_mode(self) -> None:
architectures = getattr(self.hf_config, "architectures", [])
self.embedding_mode = ModelRegistry.is_embedding_model(architectures)
# TODO: Allow the same model architecture to be specified as either
# generation or embedding model
if "Phi3VForCausalLM" in architectures:
# Match both remote and local names
embedding_mode = "/VLM2Vec" in self.model
else:
embedding_mode = ModelRegistry.is_embedding_model(architectures)
self.embedding_mode = embedding_mode
def _parse_quant_hf_config(self):
quant_cfg = getattr(self.hf_config, "quantization_config", None)