diff --git a/tests/models/multimodal/processing/test_mllama4.py b/tests/models/multimodal/processing/test_mllama4.py
index 1b9b00732..e5ba6ae24 100644
--- a/tests/models/multimodal/processing/test_mllama4.py
+++ b/tests/models/multimodal/processing/test_mllama4.py
@@ -48,7 +48,7 @@ def test_profiling(model_id: str, max_model_len: int):
     )  # image start, image, image end
 
     assert total_num_patches == sum(
-        item.get_num_embeds for item in mm_inputs["mm_placeholders"]["image"]
+        item.get_num_embeds() for item in mm_inputs["mm_placeholders"]["image"]
     )
     assert total_tokens == sum(
         placeholder.length for placeholder in mm_inputs["mm_placeholders"]["image"]
diff --git a/tests/multimodal/test_inputs.py b/tests/multimodal/test_inputs.py
index 7378c1493..d6bdf76a6 100644
--- a/tests/multimodal/test_inputs.py
+++ b/tests/multimodal/test_inputs.py
@@ -19,7 +19,7 @@ from vllm.multimodal.inputs import PlaceholderRange
 def test_placeholder_range_get_num_embeds(is_embed, expected):
     length = len(is_embed) if is_embed is not None else 5
     pr = PlaceholderRange(offset=0, length=length, is_embed=is_embed)
-    assert pr.get_num_embeds == expected
+    assert pr.get_num_embeds() == expected
 
 
 @pytest.mark.parametrize(
diff --git a/tests/v1/core/test_encoder_cache_manager.py b/tests/v1/core/test_encoder_cache_manager.py
index f82c0070c..283b74624 100644
--- a/tests/v1/core/test_encoder_cache_manager.py
+++ b/tests/v1/core/test_encoder_cache_manager.py
@@ -187,7 +187,7 @@ def test_schedule_request_multi_images_respect_compute_limit():
 def test_encoder_cache_with_is_embed_mask():
     class MockRequestWithMask(MockRequest):
         def get_num_encoder_embeds(self, input_id: int) -> int:
-            return self.mm_features[input_id].mm_position.get_num_embeds
+            return self.mm_features[input_id].mm_position.get_num_embeds()
 
     is_embed = torch.zeros(100, dtype=torch.bool)
     is_embed[torch.tensor([5, 15, 25, 35, 45, 55, 65, 75])] = True
@@ -207,7 +207,7 @@ def test_encoder_cache_with_is_embed_mask():
     assert "img1" in manager.cached
 
     old_size = 100
-    new_size = request.mm_features[0].mm_position.get_num_embeds
+    new_size = request.mm_features[0].mm_position.get_num_embeds()
     assert new_size == 8
     savings_ratio = old_size / new_size
     assert savings_ratio == 12.5
@@ -216,7 +216,7 @@ def test_encoder_cache_with_is_embed_mask():
 def test_encoder_cache_mask_based_retrieval():
     class MockRequestWithMask(MockRequest):
         def get_num_encoder_embeds(self, input_id: int) -> int:
-            return self.mm_features[input_id].mm_position.get_num_embeds
+            return self.mm_features[input_id].mm_position.get_num_embeds()
 
     is_embed = torch.tensor(
         [False, False, True, True, False, True, True, True, False, False]
@@ -233,7 +233,7 @@ def test_encoder_cache_mask_based_retrieval():
     manager = EncoderCacheManager(cache_size=50)
     manager.allocate(request, 0)
 
-    assert request.mm_features[0].mm_position.get_num_embeds == 5
+    assert request.mm_features[0].mm_position.get_num_embeds() == 5
 
     start_idx = 2
     end_idx = 8
diff --git a/vllm/multimodal/budget.py b/vllm/multimodal/budget.py
index 1380ec1ba..1dddc82b1 100644
--- a/vllm/multimodal/budget.py
+++ b/vllm/multimodal/budget.py
@@ -33,7 +33,7 @@ def get_mm_max_toks_per_item(
     )
 
     return {
-        modality: sum(item.get_num_embeds for item in placeholders)
+        modality: sum(item.get_num_embeds() for item in placeholders)
         for modality, placeholders in mm_inputs["mm_placeholders"].items()
     }
 
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index a3f8b21c2..221baba6d 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -199,7 +199,6 @@ class PlaceholderRange:
     def embeds_cumsum(self) -> torch.Tensor | None:
         return None if self.is_embed is None else self.is_embed.cumsum(dim=0)
 
-    @cached_property
     def get_num_embeds(self) -> int:
         if self.embeds_cumsum is None:
             return self.length
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
index 9f0643e4f..0b8832c16 100644
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -1100,7 +1100,7 @@ class Scheduler(SchedulerInterface):
         for i, mm_feature in enumerate(mm_features):
             start_pos = mm_feature.mm_position.offset
             num_encoder_tokens = mm_feature.mm_position.length
-            num_encoder_embeds = mm_feature.mm_position.get_num_embeds
+            num_encoder_embeds = mm_feature.mm_position.get_num_embeds()
             item_identifier = mm_feature.identifier
 
             # The encoder output is needed if the two ranges overlap:
diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py
index 64dc1831b..98c3a07d1 100644
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -786,7 +786,7 @@ class InputProcessor:
             decoder_mm_positions = prompt_inputs["mm_placeholders"]
             for modality, mm_positions in decoder_mm_positions.items():
                 for mm_position in mm_positions:
-                    embed_length = mm_position.get_num_embeds
+                    embed_length = mm_position.get_num_embeds()
                     if embed_length > self.mm_encoder_cache_size:
                         raise ValueError(
                             f"The {prompt_type} prompt contains a(n) {modality} item "
diff --git a/vllm/v1/request.py b/vllm/v1/request.py
index 8e3684d3c..3b829875f 100644
--- a/vllm/v1/request.py
+++ b/vllm/v1/request.py
@@ -260,7 +260,7 @@ class Request:
 
     def get_num_encoder_embeds(self, input_id: int) -> int:
         assert input_id < len(self.mm_features)
-        return self.mm_features[input_id].mm_position.get_num_embeds
+        return self.mm_features[input_id].mm_position.get_num_embeds()
 
     def record_event(
         self,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index ec36e1591..2ca4866d9 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2326,7 +2326,7 @@ class GPUModelRunner(
 
                 # Prefer pos_info.get_num_embeds to count precise MM embedding tokens.
                 num_tokens = self.model.get_num_mm_encoder_tokens(  # type: ignore[attr-defined]
-                    pos_info.get_num_embeds
+                    pos_info.get_num_embeds()
                 )
                 prompt_lora_mapping.append(lora_id)
                 token_lora_mapping.extend([lora_id] * num_tokens)