[Bugfix] Fix glm4.1v video inference issue (#22067)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -937,7 +937,7 @@ class Glm4vProcessingInfo(BaseProcessingInfo):
|
||||
total_frames: int) -> list[int]:
|
||||
video_processor = self.get_video_processor()
|
||||
|
||||
video_fps = metadata.get("fps", 2.0)
|
||||
video_fps = metadata.get("fps", video_processor.fps)
|
||||
meta_frames = metadata.get("total_num_frames", total_frames)
|
||||
max_frame_idx = meta_frames - 1
|
||||
duration = metadata.get("duration",
|
||||
@@ -1120,11 +1120,7 @@ class Glm4vMultiModalProcessor(BaseMultiModalProcessor[Glm4vProcessingInfo]):
|
||||
video_placeholder,
|
||||
)
|
||||
|
||||
grid_t = len(video_outputs["video_grid_thw"])
|
||||
_, grid_h, grid_w = video_outputs["video_grid_thw"][0]
|
||||
grid_thw = torch.tensor([[grid_t, grid_h, grid_w]])
|
||||
|
||||
video_grid_thw_lst.append(grid_thw)
|
||||
video_grid_thw_lst.append(video_outputs["video_grid_thw"])
|
||||
pixel_values_videos_lst.append(
|
||||
video_outputs["pixel_values_videos"])
|
||||
video_outputs = dict(
|
||||
|
||||
Reference in New Issue
Block a user