[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2024-12-08 01:10:05 +08:00
parent 1c768fe537
commit 39e227c7ae
14 changed files with 175 additions and 78 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -362,6 +362,7 @@ steps:
  - tests/models/embedding/vision_language
  - tests/models/encoder_decoder/vision_language
  commands:
+    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
    - pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'core_model or quant_model'
    - pytest -v -s models/embedding/vision_language -m core_model
@@ -377,6 +378,7 @@ steps:
  - tests/models/embedding/vision_language
  - tests/models/encoder_decoder/vision_language
  commands:
+    - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
    - pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
    # HACK - run phi3v tests separately to sidestep this transformers bug
    # https://github.com/huggingface/transformers/issues/34307