[VLM] Merged multimodal processor for Qwen2-Audio (#11303)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-19 14:14:17 +08:00
committed by GitHub
parent c6b0a7d3ba
commit 6142ef0ada
11 changed files with 414 additions and 358 deletions

View File

@@ -133,8 +133,8 @@ class LlavaMultiModalProcessor(BaseMultiModalProcessor):
hf_processor.__is_patched__ = True # type: ignore
def _get_hf_processor(self) -> Union[LlavaProcessor, PixtralProcessor]:
hf_processor = self.ctx.get_hf_processor()
assert isinstance(hf_processor, (LlavaProcessor, PixtralProcessor))
hf_processor = self.ctx.get_hf_processor(
(LlavaProcessor, PixtralProcessor))
if isinstance(hf_processor, PixtralProcessor):
self._patch_pixtral_processor(hf_processor)