diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py
index 0a22494d0..cc275ae08 100755
--- a/tools/pre_commit/mypy.py
+++ b/tools/pre_commit/mypy.py
@@ -40,7 +40,6 @@ EXCLUDE = [
     "vllm/v1/attention/ops",
     # TODO: Remove these entries after fixing mypy errors.
     "vllm/benchmarks",
-    "vllm/config",
 ]
 
 
diff --git a/vllm/config/attention.py b/vllm/config/attention.py
index 85673f384..1da647a6d 100644
--- a/vllm/config/attention.py
+++ b/vllm/config/attention.py
@@ -56,7 +56,7 @@ class AttentionConfig:
         """
         from vllm.config.utils import get_hash_factors, hash_factors
 
-        ignored_factors: list[str] = []
+        ignored_factors: set[str] = set()
         factors = get_hash_factors(self, ignored_factors)
         return hash_factors(factors)
 
diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py
index 86951c401..1c102582f 100644
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -116,29 +116,29 @@ class PassConfig:
     """
 
     # New flags
-    fuse_norm_quant: bool = Field(default=None)
+    fuse_norm_quant: bool | None = Field(default=None)
     """Fuse the custom RMSNorm + quant ops."""
-    fuse_act_quant: bool = Field(default=None)
+    fuse_act_quant: bool | None = Field(default=None)
     """Fuse the custom SiluMul + quant ops."""
-    fuse_attn_quant: bool = Field(default=None)
+    fuse_attn_quant: bool | None = Field(default=None)
     """Fuse the custom attention + quant ops."""
     eliminate_noops: bool = Field(default=True)
     """Eliminate no-op ops."""
-    enable_sp: bool = Field(default=None)
+    enable_sp: bool | None = Field(default=None)
     """Enable sequence parallelism. Requires TP>1. Automatically disabled
     if the model's hidden_size is too small for SP to be beneficial
     (threshold is device-capability dependent)."""
-    fuse_gemm_comms: bool = Field(default=None)
+    fuse_gemm_comms: bool | None = Field(default=None)
     """Enable async TP."""
-    fuse_allreduce_rms: bool = Field(default=None)
+    fuse_allreduce_rms: bool | None = Field(default=None)
     """Enable flashinfer allreduce fusion."""
     enable_qk_norm_rope_fusion: bool = False
     """Enable fused Q/K RMSNorm + RoPE pass."""
 
     # ROCm/AITER specific fusions
-    fuse_act_padding: bool = Field(default=None)
+    fuse_act_padding: bool | None = Field(default=None)
     """Fuse the custom RMSNorm + padding ops."""
-    fuse_rope_kvcache: bool = Field(default=None)
+    fuse_rope_kvcache: bool | None = Field(default=None)
     """Fuse the QK rope + KV cache ops."""
 
     rope_kvcache_fusion_max_token_num: int = 256
@@ -198,9 +198,10 @@ class PassConfig:
 
         if not current_platform.is_cuda():
             return {}
-        return FI_ALLREDUCE_FUSION_MAX_SIZE_MB.get(
-            current_platform.get_device_capability().to_int(), {}
-        )
+        capability = current_platform.get_device_capability()
+        if capability is None:
+            return {}
+        return FI_ALLREDUCE_FUSION_MAX_SIZE_MB.get(capability.to_int(), {})
 
     def compute_hash(self) -> str:
         """
@@ -350,7 +351,7 @@ class DynamicShapesConfig:
 
         from vllm.config.utils import get_hash_factors, hash_factors
 
-        factors = get_hash_factors(self, {})
+        factors = get_hash_factors(self, set())
         return hash_factors(factors)
 
 
@@ -404,7 +405,7 @@ class CompilationConfig:
     """
 
     # Top-level Compilation control
-    mode: CompilationMode = Field(default=None)
+    mode: CompilationMode = Field(default=None)  # type: ignore[assignment]
     """The compilation approach used for torch.compile-based compilation of the
     model.
 
@@ -544,7 +545,7 @@ class CompilationConfig:
     constructor, e.g. `CompilationConfig(inductor_passes={"a": func})`."""
 
     # CudaGraph compilation
-    cudagraph_mode: CUDAGraphMode = Field(default=None)
+    cudagraph_mode: CUDAGraphMode = Field(default=None)  # type: ignore[assignment]
     """
     The mode of the cudagraph:
 
@@ -606,7 +607,7 @@ class CompilationConfig:
     When `enable_lora` is False, this option has no effect.
     """
 
-    use_inductor_graph_partition: bool = Field(default=None)
+    use_inductor_graph_partition: bool = Field(default=None)  # type: ignore[assignment]
     """Use inductor graph partition to split the graph at cudagraph_unsafe ops.
     This partition happens at inductor codegen time after all passes and fusions
     are finished. It generates a single `call` function which wraps
@@ -629,7 +630,7 @@ class CompilationConfig:
     pass_config: PassConfig = field(default_factory=PassConfig)
     """Custom inductor passes, see PassConfig for more details"""
 
-    max_cudagraph_capture_size: int = field(default=None)
+    max_cudagraph_capture_size: int | None = field(default=None)
     """The maximum cudagraph capture size.
 
     If cudagraph_capture_sizes is specified, this will be set to the largest
@@ -769,7 +770,9 @@ class CompilationConfig:
             exclude["pass_config"] = pass_config_exclude
 
         config = TypeAdapter(CompilationConfig).dump_python(
-            self, exclude=exclude, exclude_unset=True
+            self,
+            exclude=exclude,  # type: ignore[arg-type]
+            exclude_unset=True,
         )
 
         return str(config)
@@ -991,7 +994,7 @@ class CompilationConfig:
         - initialize compile_sizes
         """
 
-        computed_compile_sizes = []
+        computed_compile_sizes: list[int] = []
         if self.compile_sizes is not None:
             # de-duplicate the sizes provided by the config
             self.compile_sizes = list(set(self.compile_sizes))
@@ -1001,6 +1004,7 @@ class CompilationConfig:
                         "Unrecognized size type in compile_sizes, "
                         f"expect 'cudagraph_capture_sizes', got {x}"
                     )
+                    assert self.cudagraph_capture_sizes is not None
                     computed_compile_sizes.extend(self.cudagraph_capture_sizes)
                 else:
                     assert isinstance(x, int)
@@ -1008,6 +1012,7 @@ class CompilationConfig:
         self.compile_sizes = computed_compile_sizes  # type: ignore
 
         # make sure the sizes are in ascending order
+        assert self.cudagraph_capture_sizes is not None
         self.cudagraph_capture_sizes.sort()
         if self.cudagraph_capture_sizes:
             assert self.cudagraph_capture_sizes[-1] == self.max_cudagraph_capture_size
@@ -1099,6 +1104,7 @@ class CompilationConfig:
 
     def set_splitting_ops_for_attn_fusion(self):
         assert self.pass_config.fuse_attn_quant
+        assert self.cudagraph_mode is not None
         if self.splitting_ops is None:
             self.splitting_ops = []
             if self.cudagraph_mode.has_piecewise_cudagraphs():
@@ -1290,6 +1296,4 @@ class CompilationConfig:
         if self.compile_ranges_endpoints is None:
             return []
         endpoints = sorted(set(self.compile_ranges_endpoints))
-        return [
-            Range(start=s + 1, end=e) for s, e in zip([0] + endpoints[:-1], endpoints)
-        ]
+        return [Range(s + 1, e) for s, e in zip([0] + endpoints[:-1], endpoints)]
diff --git a/vllm/config/device.py b/vllm/config/device.py
index c20e4d0f2..bb689c9b3 100644
--- a/vllm/config/device.py
+++ b/vllm/config/device.py
@@ -13,8 +13,8 @@ from vllm.utils.hashing import safe_hash
 Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"]
 
 
-@config(config=ConfigDict(arbitrary_types_allowed=True))
-class DeviceConfig:
+@config(config=ConfigDict(arbitrary_types_allowed=True))  # type: ignore[arg-type,misc]
+class DeviceConfig:  # type: ignore[misc]
     """Configuration for the device to use for vLLM execution."""
 
     device: SkipValidation[Device | torch.device | None] = "auto"
diff --git a/vllm/config/kernel.py b/vllm/config/kernel.py
index 3c08ef882..5e1c9109a 100644
--- a/vllm/config/kernel.py
+++ b/vllm/config/kernel.py
@@ -26,7 +26,7 @@ MoEBackend = Literal[
 class KernelConfig:
     """Configuration for kernel selection and warmup behavior."""
 
-    enable_flashinfer_autotune: bool = Field(default=None)
+    enable_flashinfer_autotune: bool | None = Field(default=None)
     """If True, run FlashInfer autotuning during kernel warmup."""
 
     moe_backend: MoEBackend = "auto"
diff --git a/vllm/config/kv_events.py b/vllm/config/kv_events.py
index 94da54c78..77ed5fabf 100644
--- a/vllm/config/kv_events.py
+++ b/vllm/config/kv_events.py
@@ -18,7 +18,7 @@ class KVEventsConfig:
     Events can be published externally by zmq using the event publisher config.
     """
 
-    publisher: Literal["null", "zmq"] = Field(default=None)
+    publisher: Literal["null", "zmq"] | None = Field(default=None)
     """The publisher to use for publishing kv events. Can be "null", "zmq".
     """
 
diff --git a/vllm/config/lora.py b/vllm/config/lora.py
index bfef0efa3..696e92df7 100644
--- a/vllm/config/lora.py
+++ b/vllm/config/lora.py
@@ -25,8 +25,8 @@ MaxLoRARanks = Literal[1, 8, 16, 32, 64, 128, 256, 320, 512]
 LoRAExtraVocabSize = Literal[256, 512]
 
 
-@config(config=ConfigDict(arbitrary_types_allowed=True))
-class LoRAConfig:
+@config(config=ConfigDict(arbitrary_types_allowed=True))  # type: ignore[arg-type,misc]
+class LoRAConfig:  # type: ignore[misc]
     """Configuration for LoRA."""
 
     max_lora_rank: MaxLoRARanks = 16
diff --git a/vllm/config/model.py b/vllm/config/model.py
index 19787f80b..122d5eabd 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -93,7 +93,7 @@ LayerBlockType = Literal["attention", "linear_attention", "mamba"]
 
 _RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
     "generate": [],
-    "pooling": ["embed", "classify", "reward"],
+    "pooling": ["embed", "classify"],
     "draft": [],
 }
 
@@ -102,8 +102,8 @@ AttnTypeStr = Literal[
 ]
 
 
-@config(config=ConfigDict(arbitrary_types_allowed=True))
-class ModelConfig:
+@config(config=ConfigDict(arbitrary_types_allowed=True))  # type: ignore[arg-type,misc]
+class ModelConfig:  # type: ignore[misc]
     """Configuration for the model."""
 
     model: str = "Qwen/Qwen3-0.6B"
@@ -121,7 +121,7 @@ class ModelConfig:
     """Convert the model using adapters defined in
     [vllm.model_executor.models.adapters][]. The most common use case is to
     adapt a text generation model to be used for pooling tasks."""
-    tokenizer: str = Field(default=None)
+    tokenizer: str = Field(default=None)  # type: ignore[assignment]
     """Name or path of the Hugging Face tokenizer to use. If unspecified, model
     name or path will be used."""
     tokenizer_mode: TokenizerMode | str = "auto"
@@ -177,7 +177,7 @@ class ModelConfig:
     """The specific revision to use for the tokenizer on the Hugging Face Hub.
     It can be a branch name, a tag name, or a commit id. If unspecified, will
     use the default version."""
-    max_model_len: int = Field(default=None, ge=-1)
+    max_model_len: int = Field(default=None, ge=-1)  # type: ignore[assignment]
     """Model context length (prompt and output). If unspecified, will be
     automatically derived from the model config.
 
@@ -454,7 +454,7 @@ class ModelConfig:
             self.hf_config_path = maybe_model_redirect(self.hf_config_path)
 
         if callable(self.hf_overrides):
-            hf_overrides_kw = {}
+            hf_overrides_kw: dict[str, Any] = {}
             hf_overrides_fn = self.hf_overrides
             dict_overrides: dict[str, Any] = {}
         else:
@@ -582,7 +582,7 @@ class ModelConfig:
             self.dtype,
             is_pooling_model=self.runner_type == "pooling",
             revision=self.revision,
-            config_format=self.config_format,
+            config_format=self.config_format,  # type: ignore[arg-type]
         )
 
         self.original_max_model_len = self.max_model_len
@@ -626,7 +626,7 @@ class ModelConfig:
                 k: v for k, v in mm_config_kwargs.items() if v is not None
             }
 
-            self.multimodal_config = MultiModalConfig(**mm_config_kwargs)
+            self.multimodal_config = MultiModalConfig(**mm_config_kwargs)  # type: ignore[arg-type]
 
         # Multimodal GGUF models must use original repo for mm processing
         if is_gguf(self.tokenizer) and self.is_multimodal_model:
@@ -732,7 +732,7 @@ class ModelConfig:
 
     @property
     def architectures(self) -> list[str]:
-        return self.model_arch_config.architectures
+        return self.model_arch_config.architectures  # type: ignore[return-value]
 
     @property
     def architecture(self) -> str:
@@ -1004,7 +1004,7 @@ class ModelConfig:
         is_bitsandbytes = self.quantization == "bitsandbytes"
         has_quantization_config = self.model_arch_config.quantization_config is not None
         is_8bit = (
-            self.model_arch_config.quantization_config.get("load_in_8bit", False)
+            self.model_arch_config.quantization_config.get("load_in_8bit", False)  # type: ignore[union-attr]
             if has_quantization_config
             else False
         )
@@ -1292,6 +1292,7 @@ class ModelConfig:
                     "attn_type_list, or a layer_types in the hf_config, "
                     f"cannot determine the num of {block_type} layers"
                 )
+            raise AssertionError(f"Unsupported block type: {block_type}")
 
     def get_mamba_chunk_size(self) -> int | None:
         """
diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py
index 841260e27..63aa1220b 100644
--- a/vllm/config/pooler.py
+++ b/vllm/config/pooler.py
@@ -108,14 +108,14 @@ class PoolerConfig:
                     pooling_type,
                     pooling_type,
                 )
-                self.seq_pooling_type = pooling_type
+                self.seq_pooling_type = pooling_type  # type: ignore[assignment]
             elif pooling_type in TOK_POOLING_TYPES:
                 logger.debug(
                     "Resolved `pooling_type=%r` to `tok_pooling_type=%r`.",
                     pooling_type,
                     pooling_type,
                 )
-                self.tok_pooling_type = pooling_type
+                self.tok_pooling_type = pooling_type  # type: ignore[assignment]
             else:
                 raise NotImplementedError(pooling_type)
 
diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py
index 584080ae1..0d2336186 100644
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -173,7 +173,7 @@ class SchedulerConfig:
         logger.warning_once(
             "Using custom scheduler class %s. This scheduler interface is "
             "not public and compatibility may not be maintained.",
-            self.scheduler_cls,
+            self.scheduler_cls,  # type: ignore[arg-type]
         )
         if not isinstance(self.scheduler_cls, str):
             return cast(type["SchedulerInterface"], self.scheduler_cls)
diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py
index 8ff6d9753..e9dc4cac5 100644
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -67,7 +67,7 @@ class SpeculativeConfig:
     enforce_eager: bool | None = None
     """Override the default enforce_eager from model_config"""
     # General speculative decoding control
-    num_speculative_tokens: int = Field(default=None, gt=0)
+    num_speculative_tokens: int = Field(default=None, gt=0)  # type: ignore[assignment]
     """The number of speculative tokens, if provided. It will default to the
     number in the draft model config if present, otherwise, it is required."""
     model: str | None = None
@@ -89,7 +89,7 @@ class SpeculativeConfig:
     warn users when they mistakenly provide the wrong argument."""
 
     # Draft model configuration
-    quantization: me_quant.QuantizationMethods | None = None
+    quantization: me_quant.QuantizationMethods | str | None = None
     """Quantization method that was used to quantize the draft model weights.
     If `None`, we assume the model weights are not quantized. Note that it only
     takes effect when using the draft model-based speculative method."""
diff --git a/vllm/config/utils.py b/vllm/config/utils.py
index c6fca2f93..7ae9c0c24 100644
--- a/vllm/config/utils.py
+++ b/vllm/config/utils.py
@@ -11,13 +11,13 @@ import os
 import pathlib
 import textwrap
 from collections.abc import Callable, Mapping, Sequence, Set
-from dataclasses import MISSING, field, fields, is_dataclass
+from dataclasses import MISSING, dataclass, field, fields, is_dataclass
 from itertools import pairwise
 from typing import TYPE_CHECKING, Any, Protocol, TypeVar, cast
 
 import torch
 from pydantic import ConfigDict
-from pydantic.dataclasses import dataclass
+from pydantic.dataclasses import dataclass as pydantic_dataclass
 from pydantic.fields import Field as PydanticField
 from pydantic.fields import FieldInfo
 from typing_extensions import dataclass_transform, runtime_checkable
@@ -58,8 +58,8 @@ def config(
     if config is not None:
         merged_config.update(config)
 
-    def decorator(cls):
-        return dataclass(cls, config=merged_config, **kwargs)
+    def decorator(cls: type[ConfigT]) -> type[ConfigT]:
+        return pydantic_dataclass(cls, config=merged_config, **kwargs)  # type: ignore[return-value]
 
     # Called with arguments: @config(config=...)
     if cls is None:
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index a178a8f54..8ff8f79b9 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -243,15 +243,15 @@ OPTIMIZATION_LEVEL_TO_CONFIG = {
 }
 
 
-@config(config=ConfigDict(arbitrary_types_allowed=True))
-class VllmConfig:
+@config(config=ConfigDict(arbitrary_types_allowed=True))  # type: ignore[arg-type,misc]
+class VllmConfig:  # type: ignore[misc]
     """Dataclass which contains all vllm-related configuration. This
     simplifies passing around the distinct configurations in the codebase.
     """
 
     # TODO: use default_factory once default constructing ModelConfig doesn't
     # try to download a model
-    model_config: ModelConfig = Field(default=None)
+    model_config: ModelConfig = Field(default=None)  # type: ignore[assignment]
     """Model configuration."""
     cache_config: CacheConfig = Field(default_factory=CacheConfig)
     """Cache configuration."""
@@ -883,7 +883,7 @@ class VllmConfig:
 
                     tp_size = self.parallel_config.tensor_parallel_size
                     hidden_size = self.model_config.get_hidden_size()
-                    element_size = self.model_config.dtype.itemsize
+                    element_size = self.model_config.dtype.itemsize  # type: ignore[union-attr]
                     pass_config.sp_min_token_num = get_sequence_parallelism_threshold(
                         hidden_size, tp_size, element_size
                     )
@@ -1061,7 +1061,7 @@ class VllmConfig:
 
             is_fullgraph = (
                 self.compilation_config.use_inductor_graph_partition
-                or len(self.compilation_config.splitting_ops) == 0
+                or len(self.compilation_config.splitting_ops or []) == 0
             )
             if self.parallel_config.pipeline_parallel_size > 1 or not is_fullgraph:
                 if "-rms_norm" not in self.compilation_config.custom_ops:
@@ -1216,7 +1216,7 @@ class VllmConfig:
                 )
             self.compilation_config.debug_dump_path = env_path
 
-        def has_blocked_weights():
+        def has_blocked_weights():  # type: ignore[no-redef]
             if self.quant_config is not None:
                 if hasattr(self.quant_config, "weight_block_size"):
                     return self.quant_config.weight_block_size is not None
@@ -1474,7 +1474,7 @@ class VllmConfig:
             if max_size is not None:
                 max_token_num = max_size // (
                     self.model_config.get_hidden_size()
-                    * self.model_config.dtype.itemsize
+                    * self.model_config.dtype.itemsize  # type: ignore[union-attr]
                 )
                 if compile_range_end is not None and max_token_num < compile_range_end:
                     computed_compile_ranges_endpoints.append(max_token_num)
@@ -1497,7 +1497,7 @@ class VllmConfig:
 
                 tp_size = self.parallel_config.tensor_parallel_size
                 hidden_size = self.model_config.get_hidden_size()
-                element_size = self.model_config.dtype.itemsize
+                element_size = self.model_config.dtype.itemsize  # type: ignore[union-attr]
                 pass_config.sp_min_token_num = get_sequence_parallelism_threshold(
                     hidden_size, tp_size, element_size
                 )
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index e344bae26..e0d5236bc 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1924,7 +1924,7 @@ class EngineArgs:
             )
 
         offload_config = OffloadConfig(
-            offload_backend=self.offload_backend,
+            offload_backend=self.offload_backend,  # type: ignore[arg-type]
             uva=UVAOffloadConfig(
                 cpu_offload_gb=self.cpu_offload_gb,
                 cpu_offload_params=self.cpu_offload_params,
diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py
index 701c97d6d..e27b5ee38 100644
--- a/vllm/v1/cudagraph_dispatcher.py
+++ b/vllm/v1/cudagraph_dispatcher.py
@@ -72,6 +72,9 @@ class CudagraphDispatcher:
         """Pre-compute the mapping from batch size to padded graph size."""
         max_size = self.compilation_config.max_cudagraph_capture_size
         capture_sizes = self.compilation_config.cudagraph_capture_sizes
+        assert max_size is not None, (
+            "Maximum cudagraph capture size must be set when cudagraphs are enabled."
+        )
         assert capture_sizes is not None, (
             "Cudagraph capture sizes must be set when cudagraphs are enabled."
         )
@@ -94,7 +97,7 @@ class CudagraphDispatcher:
         ):
             for size in self.compilation_config.compile_sizes:
                 size = int(size)
-                if size <= self.compilation_config.max_cudagraph_capture_size:
+                if size <= max_size:
                     padded = self._bs_to_padded_graph_size[size]
                     if padded != size:
                         raise ValueError(
@@ -265,11 +268,13 @@ class CudagraphDispatcher:
             f"No allowed cudagraph modes: valid_modes={valid_modes}, "
             f"invalid_modes={invalid_modes}"
         )
+        max_size = self.compilation_config.max_cudagraph_capture_size
 
         if (
             not self.keys_initialized
             or self.cudagraph_mode == CUDAGraphMode.NONE
-            or num_tokens > self.compilation_config.max_cudagraph_capture_size
+            or max_size is None
+            or num_tokens > max_size
             or allowed_modes <= {CUDAGraphMode.NONE}
         ):
             return CUDAGraphMode.NONE, BatchDescriptor(num_tokens)