diff --git a/mkdocs.yaml b/mkdocs.yaml
index d5d6852f3..ecc0ab692 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -63,8 +63,9 @@ plugins:
   - git-revision-date-localized:
       # exclude autogenerated files
       exclude:
-        - argparse/*
+        - api/*
         - examples/*
+        - generated/*
   - minify:
       minify_html: true
       minify_js: true
@@ -92,7 +93,6 @@ plugins:
               - "!.*_pb2_grpc"  # Exclude auto-generated gRPC stubs
             summary:
               modules: true
-            show_if_no_docstring: true
             show_signature_annotations: true
             separate_signature: true
             show_overloads: true
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 749af0d5d..5fd7d2d73 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1557,6 +1557,7 @@ class ModelConfig:
 
     @property
     def attn_type(self) -> AttnTypeStr:
+        """Determine the attention type based on model configuration."""
         if self.pooler_config is not None:
             seq_pooling_type = self._model_info.default_seq_pooling_type
             if seq_pooling_type == "CLS":
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index ede027759..fc1cea023 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -4,3 +4,4 @@
 from vllm.v1.engine.async_llm import AsyncLLM
 
 AsyncLLMEngine = AsyncLLM  # type: ignore
+"""The `AsyncLLMEngine` class is an alias of [vllm.v1.engine.async_llm.AsyncLLM][]."""
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index a0fe38eb3..419139c4b 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -4,3 +4,4 @@
 from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine
 
 LLMEngine = V1LLMEngine  # type: ignore
+"""The `LLMEngine` class is an alias of [vllm.v1.engine.llm_engine.LLMEngine][]."""
diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py
index 7848c2c03..157ab337e 100644
--- a/vllm/inputs/data.py
+++ b/vllm/inputs/data.py
@@ -298,6 +298,7 @@ which can be passed to
 
 
 SingletonInputs: TypeAlias = DecoderOnlyInputs | MultiModalEncDecInputs
+"""The inputs for a single encoder/decoder prompt."""
 
 
 @dataclass
diff --git a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
index ee4798d84..e929074d5 100644
--- a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
@@ -206,6 +206,8 @@ class SGLFusedMOE:
 
 
 class CPUFusedMOE:
+    """CPU-based fused MoE implementation."""
+
     def __init__(self, layer: torch.nn.Module) -> None:
         use_grouped_gemm, isa = self.check_grouped_gemm(layer)
         self.isa = isa
diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
index ac5a86067..77d439d32 100644
--- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
@@ -376,6 +376,8 @@ class CutlassExpertsFp8Base(mk.FusedMoEPermuteExpertsUnpermute):
 
 
 class CutlassExpertsFp8(CutlassExpertsFp8Base):
+    """CUTLASS FP8 fused MoE expert implementation."""
+
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
@@ -423,6 +425,8 @@ class CutlassExpertsFp8(CutlassExpertsFp8Base):
 
 
 class CutlassBatchedExpertsFp8(CutlassExpertsFp8Base):
+    """Batched CUTLASS FP8 fused MoE expert implementation."""
+
     @staticmethod
     def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
         # BATCHED activation format works with EP because
@@ -651,6 +655,8 @@ def run_cutlass_moe_fp4(
 
 
 class CutlassExpertsFp4(mk.FusedMoEPermuteExpertsUnpermute):
+    """CUTLASS FP4 fused MoE expert implementation."""
+
     @property
     def expects_unquantized_inputs(self) -> bool:
         return True
diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
index 00d55bfb7..59dde3ca9 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
@@ -113,6 +113,8 @@ def _valid_deep_gemm(
 
 
 class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute):
+    """DeepGemm-based fused MoE expert implementation."""
+
     def __init__(self, moe_config: FusedMoEConfig, quant_config: FusedMoEQuantConfig):
         super().__init__(moe_config=moe_config, quant_config=quant_config)
         assert quant_config.block_shape == get_mk_alignment_for_contiguous_layout()
diff --git a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
index 5d382cfc9..3d3a21f81 100644
--- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
@@ -637,6 +637,8 @@ class MarlinExpertsBase(mk.FusedMoEPermuteExpertsUnpermute):
 
 
 class MarlinExperts(MarlinExpertsBase):
+    """Marlin-based fused MoE expert implementation."""
+
     def supports_expert_map(self) -> bool:
         return True
 
@@ -738,6 +740,8 @@ class MarlinExperts(MarlinExpertsBase):
 
 
 class BatchedMarlinExperts(MarlinExpertsBase):
+    """Batched Marlin-based fused MoE expert implementation."""
+
     def __init__(
         self,
         moe_config: FusedMoEConfig,
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 6ca3213fb..352288e17 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1527,6 +1527,7 @@ def fused_experts(
     expert_map: torch.Tensor | None = None,
     quant_config: FusedMoEQuantConfig | None = None,
 ) -> torch.Tensor:
+    """Run fused MoE expert computation using Triton kernels."""
     if quant_config is None:
         quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
 
@@ -1879,6 +1880,8 @@ def fused_experts_impl(
 
 
 class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
+    """Triton-based fused MoE expert implementation."""
+
     def __init__(
         self,
         moe_config: FusedMoEConfig,
diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
index eafdf97a9..5aaf2a8c3 100644
--- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
+++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
@@ -221,6 +221,7 @@ def triton_kernel_fused_experts(
     intermediate_cache: torch.Tensor | None = None,
     a1q_scale: torch.Tensor | None = None,
 ) -> torch.Tensor:
+    """Triton implementation of fused expert computation using OAI kernels."""
     if quant_config is None:
         quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
 
@@ -444,6 +445,8 @@ class BaseOAITritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
 
 
 class OAITritonExperts(BaseOAITritonExperts):
+    """OAI Triton-based fused MoE expert implementation."""
+
     @staticmethod
     def activation_format() -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
index 78b941498..289ac0d14 100644
--- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
@@ -63,6 +63,8 @@ def pplx_hidden_dim_scale_bytes(
 
 
 class PplxPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
+    """PPLX-based prepare and finalize for expert parallelism."""
+
     def __init__(
         self,
         a2a: pplx.AllToAll,
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize.py b/vllm/model_executor/layers/fused_moe/prepare_finalize.py
index d10476702..7b8dd3b77 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize.py
@@ -131,6 +131,8 @@ class MoEPrepareAndFinalizeNaiveEP(mk.FusedMoEPrepareAndFinalize):
 
 
 class MoEPrepareAndFinalizeNoEP(mk.FusedMoEPrepareAndFinalize):
+    """MoE prepare and finalize without expert parallelism."""
+
     @property
     def activation_format(self) -> mk.FusedMoEActivationFormat:
         return mk.FusedMoEActivationFormat.Standard
diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
index 33150da6f..535abc420 100644
--- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
@@ -192,6 +192,7 @@ def rocm_aiter_fused_experts(
     num_local_tokens: torch.Tensor | None = None,
     output_dtype: torch.dtype | None = None,
 ) -> torch.Tensor:
+    """ROCm AITER fused MoE expert computation."""
     if quant_config is None:
         quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
 
diff --git a/vllm/model_executor/layers/fused_moe/trtllm_moe.py b/vllm/model_executor/layers/fused_moe/trtllm_moe.py
index aa7185040..074b8154a 100644
--- a/vllm/model_executor/layers/fused_moe/trtllm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/trtllm_moe.py
@@ -18,6 +18,8 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
 
 
 class TrtLlmGenExperts(mk.FusedMoEPermuteExpertsUnpermute):
+    """TensorRT-LLM-based fused MoE expert implementation."""
+
     def __init__(
         self,
         moe_config: FusedMoEConfig,
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index 023cf3f67..690ff0454 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -680,6 +680,8 @@ class CompressedTensorsW4A4Nvfp4MoEMethod(CompressedTensorsMoEMethod):
 
 
 class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
+    """W8A8 FP8 MoE quantization using compressed tensors."""
+
     def __init__(
         self,
         weight_quant: QuantizationArgs,
diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py
index 75501076a..5cd6d5d79 100644
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -235,6 +235,8 @@ class Mxfp4Config(QuantizationConfig):
 
 
 class Mxfp4MoEMethod(FusedMoEMethodBase):
+    """MXFP4 MoE quantization method."""
+
     def __init__(self, moe: FusedMoEConfig):
         super().__init__(moe)
         self.weight_dtype = "mxfp4"
diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py
index 0441996f6..f812eb849 100644
--- a/vllm/model_executor/models/blip2.py
+++ b/vllm/model_executor/models/blip2.py
@@ -73,6 +73,7 @@ class Blip2ImageEmbeddingInputs(TensorSchema):
 
 
 Blip2ImageInputs: TypeAlias = Blip2ImagePixelInputs | Blip2ImageEmbeddingInputs
+"""Alias for supported BLIP-2 image input types."""
 
 
 class Blip2QFormerMultiHeadAttention(nn.Module):
diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py
index 2f9aaa3f3..c35728183 100644
--- a/vllm/model_executor/models/llava.py
+++ b/vllm/model_executor/models/llava.py
@@ -121,6 +121,7 @@ class LlavaImageEmbeddingInputs(TensorSchema):
 LlavaImageInputs: TypeAlias = (
     LlavaImagePixelInputs | PixtralHFImagePixelInputs | LlavaImageEmbeddingInputs
 )
+"""Alias for supported LLaVA image input types."""
 
 
 class LlavaMultiModalProjector(nn.Module):
diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py
index 9f83c7910..4ea58ce71 100644
--- a/vllm/model_executor/models/llava_next.py
+++ b/vllm/model_executor/models/llava_next.py
@@ -78,6 +78,7 @@ class LlavaNextImageEmbeddingInputs(TensorSchema):
 LlavaNextImageInputs: TypeAlias = (
     LlavaNextImagePixelInputs | LlavaNextImageEmbeddingInputs
 )
+"""Alias for supported LLaVA-NeXT image input types."""
 
 
 class LlavaNextLikeConfig(LlavaLikeConfig, Protocol):
@@ -106,6 +107,7 @@ class LlavaNextProcessingInfo(BaseLlavaProcessingInfo):
         image_width: int,
         image_height: int,
     ) -> int:
+        """Get the number of image tokens for the given image dimensions."""
         hf_config = self.get_hf_config()
         vision_encoder_info = self.get_vision_encoder_info()
 
diff --git a/vllm/multimodal/processing/processor.py b/vllm/multimodal/processing/processor.py
index 5f98cce3d..e1a164d4e 100644
--- a/vllm/multimodal/processing/processor.py
+++ b/vllm/multimodal/processing/processor.py
@@ -1110,6 +1110,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
         self,
         mm_items: MultiModalDataItems,
     ) -> tuple[Mapping[str, object], Mapping[str, object]]:
+        """Extract processor and passthrough data from multi-modal items."""
         processor_data = dict[str, object]()
         passthrough_data = dict[str, object]()
 
@@ -1616,6 +1617,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
         token_ids: list[int],
         mm_prompt_updates: MultiModalPromptUpdates,
     ) -> tuple[list[int], Mapping[str, list[PlaceholderFeaturesInfo]]]:
+        """Apply multi-modal prompt updates to token IDs."""
         tokenizer = self.info.get_tokenizer()
 
         new_token_ids, match_result = self._apply_token_matches(
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 45dde6e47..27f5ea517 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -35,6 +35,8 @@ def in_wsl() -> bool:
 
 
 class PlatformEnum(enum.Enum):
+    """Enumeration of supported hardware platforms."""
+
     CUDA = enum.auto()
     ROCM = enum.auto()
     TPU = enum.auto()
diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py
index 4c59d5364..89fadad7a 100644
--- a/vllm/plugins/__init__.py
+++ b/vllm/plugins/__init__.py
@@ -26,6 +26,7 @@ plugins_loaded = False
 
 
 def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
+    """Load plugins registered under the given entry point group."""
     from importlib.metadata import entry_points
 
     allowed_plugins = envs.VLLM_PLUGINS
diff --git a/vllm/plugins/io_processors/interface.py b/vllm/plugins/io_processors/interface.py
index a978b1e74..fa71b4ca0 100644
--- a/vllm/plugins/io_processors/interface.py
+++ b/vllm/plugins/io_processors/interface.py
@@ -16,6 +16,8 @@ IOProcessorOutput = TypeVar("IOProcessorOutput")
 
 
 class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
+    """Abstract interface for pre/post-processing of engine I/O."""
+
     def __init__(self, vllm_config: VllmConfig):
         super().__init__()
 
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index bb4fffb69..072d2a164 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -69,6 +69,8 @@ class InputStreamError(Exception):
 
 
 class AsyncLLM(EngineClient):
+    """An asynchronous wrapper for the vLLM engine."""
+
     def __init__(
         self,
         vllm_config: VllmConfig,