diff --git a/vllm/config/cache.py b/vllm/config/cache.py
index 50fe82eb1..dcc93d987 100644
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -66,22 +66,21 @@ class CacheConfig:
     enable_prefix_caching: bool = True
     """Whether to enable prefix caching."""
     prefix_caching_hash_algo: PrefixCachingHashAlgo = "sha256"
-    """Set the hash algorithm for prefix caching:\n
-    - "sha256" uses Pickle for object serialization before hashing. This is the
-    current default, as SHA256 is the most secure choice to avoid potential
-    hash collisions.\n
+    """Set the hash algorithm for prefix caching:
+
+    - "sha256" uses Pickle for object serialization before hashing. This is the current
+      default, as SHA256 is the most secure choice to avoid potential hash collisions.
     - "sha256_cbor" provides a reproducible, cross-language compatible hash. It
-    serializes objects using canonical CBOR and hashes them with SHA-256.\n
+      serializes objects using canonical CBOR and hashes them with SHA-256.
     - "xxhash" uses Pickle serialization with xxHash (128-bit) for faster,
-    non-cryptographic hashing. Requires the optional ``xxhash`` package.
-    IMPORTANT: Use of a hashing algorithm that is not considered 
-    cryptographically secure theoretically increases the risk of hash collisions,
-    which can cause undefined behavior or even leak private information in
-    multi-tenant environments. Even if collisions are still very unlikely, it is
-    important to consider your security risk tolerance against the performance
-    benefits before turning this on.\n
+      non-cryptographic hashing. Requires the optional ``xxhash`` package.
+      IMPORTANT: Use of a hashing algorithm that is not considered  cryptographically
+      secure theoretically increases the risk of hash collisions, which can cause
+      undefined behavior or even leak private information in multi-tenant environments.
+      Even if collisions are still very unlikely, it is important to consider your
+      security risk tolerance against the performance benefits before turning this on.
     - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
-    reproducible hashing. Requires the optional ``xxhash`` package."""
+      reproducible hashing. Requires the optional ``xxhash`` package."""
     calculate_kv_scales: bool = False
     """Deprecated: This option is deprecated and will be removed in v0.19.
     It enables dynamic calculation of `k_scale` and `v_scale` when
diff --git a/vllm/config/kernel.py b/vllm/config/kernel.py
index 2ec18289d..4476cd125 100644
--- a/vllm/config/kernel.py
+++ b/vllm/config/kernel.py
@@ -32,14 +32,14 @@ class KernelConfig:
     moe_backend: MoEBackend = "auto"
     """Backend for MoE expert computation kernels. Available options:
 
-    - "auto": Automatically select the best backend based on model and hardware\n
-    - "triton": Use Triton-based fused MoE kernels\n
-    - "deep_gemm": Use DeepGEMM kernels (FP8 block-quantized only)\n
-    - "cutlass": Use vLLM CUTLASS kernels\n
-    - "flashinfer_trtllm": Use FlashInfer with TRTLLM-GEN kernels\n
-    - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels\n
-    - "flashinfer_cutedsl": Use FlashInfer with CuteDSL kernels (FP4 only)\n
-    - "marlin": Use Marlin kernels (weight-only quantization)\n
+    - "auto": Automatically select the best backend based on model and hardware
+    - "triton": Use Triton-based fused MoE kernels
+    - "deep_gemm": Use DeepGEMM kernels (FP8 block-quantized only)
+    - "cutlass": Use vLLM CUTLASS kernels
+    - "flashinfer_trtllm": Use FlashInfer with TRTLLM-GEN kernels
+    - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
+    - "flashinfer_cutedsl": Use FlashInfer with CuteDSL kernels (FP4 only)
+    - "marlin": Use Marlin kernels (weight-only quantization)
     - "aiter": Use AMD AITer kernels (ROCm only)"""
 
     @field_validator("moe_backend", mode="before")
diff --git a/vllm/config/load.py b/vllm/config/load.py
index e77d9b378..93240ec5f 100644
--- a/vllm/config/load.py
+++ b/vllm/config/load.py
@@ -51,7 +51,7 @@ class LoadConfig:
     - "gguf" will load weights from GGUF format files (details specified in
       https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).
     - "mistral" will load weights from consolidated safetensors files used by
-      Mistral models.\n
+      Mistral models.
     - Other custom values can be supported via plugins.
     """
     download_dir: str | None = None
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 225ee119a..acb43a04b 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -125,26 +125,28 @@ class ModelConfig:
     """Name or path of the Hugging Face tokenizer to use. If unspecified, model
     name or path will be used."""
     tokenizer_mode: TokenizerMode | str = "auto"
-    """Tokenizer mode:\n
+    """Tokenizer mode:
+
     - "auto" will use the tokenizer from `mistral_common` for Mistral models
-    if available, otherwise it will use the "hf" tokenizer.\n
-    - "hf" will use the fast tokenizer if available.\n
-    - "slow" will always use the slow tokenizer.\n
-    - "mistral" will always use the tokenizer from `mistral_common`.\n
-    - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.\n
-    - "qwen_vl" will always use the tokenizer from `qwen_vl`.\n
+      if available, otherwise it will use the "hf" tokenizer.
+    - "hf" will use the fast tokenizer if available.
+    - "slow" will always use the slow tokenizer.
+    - "mistral" will always use the tokenizer from `mistral_common`.
+    - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.
+    - "qwen_vl" will always use the tokenizer from `qwen_vl`.
     - Other custom values can be supported via plugins."""
     trust_remote_code: bool = False
     """Trust remote code (e.g., from HuggingFace) when downloading the model
     and tokenizer."""
     dtype: ModelDType | torch.dtype = "auto"
-    """Data type for model weights and activations:\n
+    """Data type for model weights and activations:
+
     - "auto" will use FP16 precision for FP32 and FP16 models, and BF16
-    precision for BF16 models.\n
-    - "half" for FP16. Recommended for AWQ quantization.\n
-    - "float16" is the same as "half".\n
-    - "bfloat16" for a balance between precision and range.\n
-    - "float" is shorthand for FP32 precision.\n
+      precision for BF16 models.
+    - "half" for FP16. Recommended for AWQ quantization.
+    - "float16" is the same as "half".
+    - "bfloat16" for a balance between precision and range.
+    - "float" is shorthand for FP32 precision.
     - "float32" for FP32 precision."""
     seed: int = 0
     """Random seed for reproducibility.
@@ -182,13 +184,14 @@ class ModelConfig:
     automatically derived from the model config.
 
     When passing via `--max-model-len`, supports k/m/g/K/M/G in human-readable
-    format. Examples:\n
-    - 1k -> 1000\n
-    - 1K -> 1024\n
-    - 25.6k -> 25,600\n
+    format. Examples:
+
+    - 1k -> 1000
+    - 1K -> 1024
+    - 25.6k -> 25,600
     - -1 or 'auto' -> Automatically choose the maximum model length that fits in
-    GPU memory. This will use the model's maximum context length if it fits,
-    otherwise it will find the largest length that can be accommodated."""
+      GPU memory. This will use the model's maximum context length if it fits,
+      otherwise it will find the largest length that can be accommodated."""
     spec_target_max_model_len: int | None = None
     """Specify the maximum length for spec decoding draft models."""
     quantization: QuantizationMethods | str | None = None
@@ -248,10 +251,11 @@ class ModelConfig:
     prometheus metrics, if multiple names provided, metrics tag will take the
     first one."""
     config_format: str | ConfigFormat = "auto"
-    """The format of the model config to load:\n
+    """The format of the model config to load:
+
     - "auto" will try to load the config in hf format if available after trying
-    to load in mistral format.\n
-    - "hf" will load the config in hf format.\n
+      to load in mistral format.
+    - "hf" will load the config in hf format.
     - "mistral" will load the config in mistral format."""
     hf_token: bool | str | None = None
     """The token to use as HTTP bearer authorization for remote files . If
@@ -276,12 +280,12 @@ class ModelConfig:
     """Enable sleep mode for the engine (only cuda and
     hip platforms are supported)."""
     model_impl: str | ModelImpl = "auto"
-    """Which implementation of the model to use:\n
-    - "auto" will try to use the vLLM implementation, if it exists, and fall
-    back to the Transformers implementation if no vLLM implementation is
-    available.\n
-    - "vllm" will use the vLLM model implementation.\n
-    - "transformers" will use the Transformers model implementation.\n
+    """Which implementation of the model to use:
+
+    - "auto" will try to use the vLLM implementation, if it exists, and fall back to the
+      Transformers implementation if no vLLM implementation is available.
+    - "vllm" will use the vLLM model implementation.
+    - "transformers" will use the Transformers model implementation.
     - "terratorch" will use the TerraTorch model implementation.
     """
     override_attention_dtype: str | None = None
@@ -1512,10 +1516,11 @@ class ModelConfig:
     @property
     def score_type(self) -> ScoreType:
         """
-        Scoring API handles score/rerank for:\n
-        - "classify" task (score_type: cross-encoder models)\n
-        - "embed" task (score_type: bi-encoder models)\n
-        - "token_embed" task (score_type: late interaction models)\n
+        Scoring API handles score/rerank for:
+
+        - "classify" task (score_type: cross-encoder models)
+        - "embed" task (score_type: bi-encoder models)
+        - "token_embed" task (score_type: late interaction models)
         """
         # fixme: self._model_info.score_type is the score type before
         #  as_seq_cls_model, which is "bi-encoder", rather than the
@@ -1593,9 +1598,10 @@ class ModelConfig:
         such as the lm_head in a generation model,
         or the score or classifier in a classification model.
 
-        `head_dtype` currently only supports pooling models.\n
-        - The pooling model defaults to using fp32 head,
-        you can use --hf-overrides '{"head_dtype": "model"}' to disable it.
+        `head_dtype` currently only supports pooling models.
+
+        - The pooling model defaults to using fp32 head, you can use
+          --hf-overrides '{"head_dtype": "model"}' to disable it.
         """
 
         head_dtype = _get_head_dtype(
diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py
index 1c9bc43b0..e66511c92 100644
--- a/vllm/config/multimodal.py
+++ b/vllm/config/multimodal.py
@@ -146,14 +146,14 @@ class MultiModalConfig:
     parallelism (TP).
 
     - `"weights"`: Within the same vLLM engine, split the weights of
-        each layer across TP ranks. (default TP behavior)\n
+      each layer across TP ranks. (default TP behavior)
     - `"data"`: Within the same vLLM engine, split the batched input data
-        across TP ranks to process the data in parallel, while hosting
-        the full weights on each TP rank.
-        This batch-level DP is not to be confused with API request-level
-        DP (which is controlled by `--data-parallel-size`).
-        This is only supported on a per-model basis and falls back to
-        `"weights"` if the encoder does not support DP."""
+      across TP ranks to process the data in parallel, while hosting
+      the full weights on each TP rank.
+      This batch-level DP is not to be confused with API request-level
+      DP (which is controlled by `--data-parallel-size`).
+      This is only supported on a per-model basis and falls back to
+      `"weights"` if the encoder does not support DP."""
     mm_encoder_attn_backend: AttentionBackendEnum | None = None
     """Optional override for the multi-modal encoder attention backend when
     using vision transformers. Accepts any value from
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
index 7dd9c5bb5..8afff3af2 100644
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -148,10 +148,11 @@ class ParallelConfig:
     eplb_config: EPLBConfig = Field(default_factory=EPLBConfig)
     """Expert parallelism configuration."""
     expert_placement_strategy: ExpertPlacementStrategy = "linear"
-    """The expert placement strategy for MoE layers:\n
+    """The expert placement strategy for MoE layers:
+
     - "linear": Experts are placed in a contiguous manner. For example, with 4
       experts and 2 ranks, rank 0 will have experts [0, 1] and rank 1 will have
-      experts [2, 3].\n
+      experts [2, 3].
     - "round_robin": Experts are placed in a round-robin manner. For example,
       with 4 experts and 2 ranks, rank 0 will have experts [0, 2] and rank 1
       will have experts [1, 3]. This strategy can help improve load balancing
@@ -159,11 +160,11 @@ class ParallelConfig:
     all2all_backend: All2AllBackend = "allgather_reducescatter"
     """All2All backend for MoE expert parallel communication. Available options:
 
-    - "allgather_reducescatter": All2all based on allgather and reducescatter\n
-    - "deepep_high_throughput": Use deepep high-throughput kernels\n
-    - "deepep_low_latency": Use deepep low-latency kernels\n
-    - "mori": Use mori kernels\n
-    - "nixl_ep": Use nixl-ep kernels\n
+    - "allgather_reducescatter": All2all based on allgather and reducescatter
+    - "deepep_high_throughput": Use deepep high-throughput kernels
+    - "deepep_low_latency": Use deepep low-latency kernels
+    - "mori": Use mori kernels
+    - "nixl_ep": Use nixl-ep kernels
     - "flashinfer_nvlink_two_sided": Use flashinfer two-sided kernels for mnnvl
     - "flashinfer_nvlink_one_sided": Use flashinfer high-throughput a2a kernels"""
 
diff --git a/vllm/config/profiler.py b/vllm/config/profiler.py
index e79e21310..68fa78854 100644
--- a/vllm/config/profiler.py
+++ b/vllm/config/profiler.py
@@ -37,7 +37,7 @@ class ProfilerConfig:
     profiler: ProfilerKind | None = None
     """Which profiler to use. Defaults to None. Options are:
 
-    - 'torch': Use PyTorch profiler.\n
+    - 'torch': Use PyTorch profiler.
     - 'cuda': Use CUDA profiler."""
 
     torch_profiler_dir: str = ""
diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py
index f988c1086..3cd99bb08 100644
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -106,11 +106,12 @@ class SchedulerConfig:
     max_num_batched_tokens in case max multimodal embedding size is larger."""
 
     policy: SchedulerPolicy = "fcfs"
-    """The scheduling policy to use:\n
-    - "fcfs" means first come first served, i.e. requests are handled in order
-    of arrival.\n
+    """The scheduling policy to use:
+
+    - "fcfs" means first come first served, i.e. requests are handled in order 
+      of arrival.
     - "priority" means requests are handled based on given priority (lower
-    value means earlier handling) and time of arrival deciding any ties)."""
+      value means earlier handling) and time of arrival deciding any ties)."""
 
     disable_chunked_mm_input: bool = False
     """If set to true and chunked prefill is enabled, we do not want to
diff --git a/vllm/config/utils.py b/vllm/config/utils.py
index 73abd7865..a953fcb46 100644
--- a/vllm/config/utils.py
+++ b/vllm/config/utils.py
@@ -37,10 +37,12 @@ ConfigT = TypeVar("ConfigT", bound=DataclassInstance)
 
 
 @overload
+@dataclass_transform(field_specifiers=(PydanticField,))
 def config(cls: type[ConfigT]) -> type[ConfigT]: ...
 
 
 @overload
+@dataclass_transform(field_specifiers=(PydanticField,))
 def config(
     *, config: ConfigDict | None = None, **kwargs: Any
 ) -> Callable[[type[ConfigT]], type[ConfigT]]: ...
diff --git a/vllm/utils/argparse_utils.py b/vllm/utils/argparse_utils.py
index e4482d4fb..c48edb68f 100644
--- a/vllm/utils/argparse_utils.py
+++ b/vllm/utils/argparse_utils.py
@@ -31,14 +31,12 @@ class SortedHelpFormatter(ArgumentDefaultsHelpFormatter, RawDescriptionHelpForma
     def _split_lines(self, text, width):
         """
         1. Sentences split across lines have their single newlines removed.
-        2. Paragraphs and explicit newlines are split into separate lines.
+        2. Paragraphs and lists are split into separate lines.
         3. Each line is wrapped to the specified width (width of terminal).
         """
-        # The patterns also include whitespace after the newline
-        single_newline = re.compile(r"(?<!\n)\n(?!\n)\s*")
-        multiple_newlines = re.compile(r"\n{2,}\s*")
-        text = single_newline.sub(" ", text)
-        lines = re.split(multiple_newlines, text)
+        # The pattern also includes whitespace after the newline
+        newlines_to_remove = re.compile(r"(?<!\n)\n(?!\n)(?!\s*(-|\*|\+|\d+\.))\s*")
+        lines = newlines_to_remove.sub(" ", text).splitlines()
         return sum([textwrap.wrap(line, width) for line in lines], [])
 
     def add_arguments(self, actions):