fix(minicpmv): fix audio inference by handling meta device in init_re… (#36751)

Signed-off-by: caitianchi <caitianchi@modelbest.cn>
2026-03-11 18:06:28 +08:00
parent 545d18d81b
commit 4286cc5ec2
1 changed files with 10 additions and 8 deletions
--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -1453,10 +1453,11 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
                quant_config=quant_config,
                prefix=prefix,
            )
-
-        return resampler.to(
-            device=current_platform.device_type, dtype=torch.get_default_dtype()
-        )
+        target_device = current_platform.device_type
+        target_dtype = torch.get_default_dtype()
+        if any(p.is_meta for p in resampler.parameters()):
+            return resampler.to_empty(device=target_device).to(dtype=target_dtype)
+        return resampler.to(device=target_device, dtype=target_dtype)

    def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
        pixel_values = data["pixel_values"]
@@ -1649,10 +1650,11 @@ class MiniCPMV4_5(MiniCPMVBaseModel, SupportsLoRA):
                quant_config=quant_config,
                prefix=prefix,
            )
-
-        return resampler.to(
-            device=current_platform.device_type, dtype=torch.get_default_dtype()
-        )
+        target_device = current_platform.device_type
+        target_dtype = torch.get_default_dtype()
+        if any(p.is_meta for p in resampler.parameters()):
+            return resampler.to_empty(device=target_device).to(dtype=target_dtype)
+        return resampler.to(device=target_device, dtype=target_dtype)

    def get_vision_hidden_states(self, data: MiniCPMVImagePixelInputs) -> torch.Tensor:
        pixel_values = data["pixel_values"]