[Refactor] Define MM data parser in processing info instead of processor itself (#33260)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-29 13:55:17 +08:00
committed by GitHub
parent 07ea184f00
commit 51550179fc
34 changed files with 399 additions and 347 deletions

View File

@@ -274,16 +274,6 @@ class KeyeVL1_5Projector(nn.Module):
return hidden_states.view(*dims, -1)
class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
def get_max_frame_per_video(self) -> int:
return 2048
def get_supported_mm_limits(
self,
) -> Mapping[str, int | None]:
return {"image": None, "video": 1}
def _keye_field_config(
hf_inputs: Mapping[str, torch.Tensor],
):
@@ -365,10 +355,22 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
return super()._parse_video_data(data)
class KeyeVL1_5MultiModalProcessor(BaseMultiModalProcessor[KeyeVL1_5ProcessingInfo]):
def _get_data_parser(self) -> MultiModalDataParser:
return KeyeVL1_5MultiModalDataParser()
class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
def get_data_parser(self):
return KeyeVL1_5MultiModalDataParser(
expected_hidden_size=self._get_expected_hidden_size(),
)
def get_max_frame_per_video(self) -> int:
return 2048
def get_supported_mm_limits(
self,
) -> Mapping[str, int | None]:
return {"image": None, "video": 1}
class KeyeVL1_5MultiModalProcessor(BaseMultiModalProcessor[KeyeVL1_5ProcessingInfo]):
def _get_prompt_updates(
self,
mm_items: MultiModalDataItems,