[Refactor] Define MM data parser in processing info instead of processor itself (#33260)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -274,16 +274,6 @@ class KeyeVL1_5Projector(nn.Module):
|
||||
return hidden_states.view(*dims, -1)
|
||||
|
||||
|
||||
class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
|
||||
def get_max_frame_per_video(self) -> int:
|
||||
return 2048
|
||||
|
||||
def get_supported_mm_limits(
|
||||
self,
|
||||
) -> Mapping[str, int | None]:
|
||||
return {"image": None, "video": 1}
|
||||
|
||||
|
||||
def _keye_field_config(
|
||||
hf_inputs: Mapping[str, torch.Tensor],
|
||||
):
|
||||
@@ -365,10 +355,22 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
|
||||
return super()._parse_video_data(data)
|
||||
|
||||
|
||||
class KeyeVL1_5MultiModalProcessor(BaseMultiModalProcessor[KeyeVL1_5ProcessingInfo]):
|
||||
def _get_data_parser(self) -> MultiModalDataParser:
|
||||
return KeyeVL1_5MultiModalDataParser()
|
||||
class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
|
||||
def get_data_parser(self):
|
||||
return KeyeVL1_5MultiModalDataParser(
|
||||
expected_hidden_size=self._get_expected_hidden_size(),
|
||||
)
|
||||
|
||||
def get_max_frame_per_video(self) -> int:
|
||||
return 2048
|
||||
|
||||
def get_supported_mm_limits(
|
||||
self,
|
||||
) -> Mapping[str, int | None]:
|
||||
return {"image": None, "video": 1}
|
||||
|
||||
|
||||
class KeyeVL1_5MultiModalProcessor(BaseMultiModalProcessor[KeyeVL1_5ProcessingInfo]):
|
||||
def _get_prompt_updates(
|
||||
self,
|
||||
mm_items: MultiModalDataItems,
|
||||
|
||||
Reference in New Issue
Block a user