diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py
index 10261aa42..2fb79f57a 100644
--- a/vllm/model_executor/models/llava_next.py
+++ b/vllm/model_executor/models/llava_next.py
@@ -540,7 +540,7 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
         Unlike in LLaVA-1.5, the number of image tokens inputted to the language
         model depends on the original size of the input image. Including the
         original image token in the input, the required number of image tokens
-        is given by {func}`get_llava_next_image_feature_size`.
+        is given by [get_llava_next_image_feature_size][].
 
         This way, the `positions` and `attn_metadata` are consistent
         with the `input_ids`.
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index 320a26f37..f56110d94 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -387,7 +387,7 @@ _M = TypeVar("_M", bound=Union[_HasModalityAttr, _HasModalityProp])
 
 
 def full_groupby_modality(values: Iterable[_M]) -> ItemsView[str, list[_M]]:
-    """Convenience function to apply {func}`full_groupby` based on modality."""
+    """Convenience function to apply [full_groupby][] based on modality."""
     return full_groupby(values, key=lambda x: x.modality)
 
 
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 20284b4e1..646faa944 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -157,7 +157,7 @@ class Platform:
         return self._enum == PlatformEnum.OOT
 
     def is_cuda_alike(self) -> bool:
-        """Stateless version of {func}`torch.cuda.is_available`."""
+        """Stateless version of [torch.cuda.is_available][]."""
         return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
 
     def is_sleep_mode_available(self) -> bool:
@@ -194,7 +194,7 @@ class Platform:
         cls,
         device_id: int = 0,
     ) -> Optional[DeviceCapability]:
-        """Stateless version of {func}`torch.cuda.get_device_capability`."""
+        """Stateless version of [torch.cuda.get_device_capability][]."""
         return None
 
     @classmethod
diff --git a/vllm/sequence.py b/vllm/sequence.py
index f3dfd32d9..e9212a825 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -27,7 +27,7 @@ VLLM_INVALID_TOKEN_ID = -1
 
 
 def array_full(token_id: int, count: int):
-    """{class}`array` equivalent of {func}`numpy.full`."""
+    """{class}`array` equivalent of [numpy.full][]."""
     return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
 
 
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 267754036..28503a0a9 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -66,7 +66,7 @@ def gather_mm_placeholders(
     """
     Reconstructs the embeddings from the placeholder tokens.
 
-    This is the operation of {func}`scatter_mm_placeholders`.
+    This is the operation of [scatter_mm_placeholders][].
     """
     if is_embed is None:
         return placeholders