diff --git a/vllm/config/model.py b/vllm/config/model.py
index a730aa8ad..f080803f4 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -1579,14 +1579,14 @@ class ModelConfig:
 
     @property
     def is_hybrid(self) -> bool:
+        if not self._model_info.is_hybrid:
+            return False
         # Handle granite-4.0-micro case which uses hybrid config but does not
         # actually contain any non-attention layers.
         layer_types = getattr(self.hf_config, "layer_types", None)
-        if layer_types is not None and all(
+        return layer_types is None or not all(
             layer == "attention" for layer in layer_types
-        ):
-            return False
-        return self._model_info.is_hybrid
+        )
 
     @property
     def has_noops(self) -> bool:
diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py
index e9eaaa492..fcc4ad826 100644
--- a/vllm/entrypoints/openai/serving_responses.py
+++ b/vllm/entrypoints/openai/serving_responses.py
@@ -2005,7 +2005,6 @@ class OpenAIServingResponses(OpenAIServing):
             return event
 
         async with AsyncExitStack() as exit_stack:
-            processer = None
             if self.use_harmony:
                 # TODO: in streaming, we noticed this bug:
                 # https://github.com/vllm-project/vllm/issues/25697
diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py
index ca2e27fa4..79ba31dc0 100644
--- a/vllm/entrypoints/renderer.py
+++ b/vllm/entrypoints/renderer.py
@@ -44,11 +44,8 @@ class RenderConfig:
     def verify_truncate_prompt_tokens(self, model_config: ModelConfig) -> int | None:
         """Validate and normalize `truncate_prompt_tokens` parameter."""
         truncate_prompt_tokens = self.truncate_prompt_tokens
-        if truncate_prompt_tokens is None:
-            return None
-
-        if truncate_prompt_tokens == 0:
-            return 0
+        if truncate_prompt_tokens is None or truncate_prompt_tokens == 0:
+            return truncate_prompt_tokens
 
         if truncate_prompt_tokens < 0:
             truncate_prompt_tokens = model_config.max_model_len
diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py
index 0372b06d0..43b5fa5ad 100644
--- a/vllm/inputs/preprocess.py
+++ b/vllm/inputs/preprocess.py
@@ -686,11 +686,7 @@ class InputPreprocessor:
         mm_uuids: MultiModalUUIDDict | None = None,
     ) -> ProcessorInputs:
         """Preprocess the input prompt."""
-        res = self._preprocess(
-            prompt,
-            tokenization_kwargs,
-            mm_uuids=mm_uuids,
-        )
+        res = self._preprocess(prompt, tokenization_kwargs, mm_uuids=mm_uuids)
 
         if self.mm_processor_cache and self.mm_cache_stats is not None:
             delta = self.mm_processor_cache.make_stats(delta=True)
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index 5617e04a8..afd782870 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -171,10 +171,7 @@ class PlaceholderRange:
 
     @cached_property
     def embeds_cumsum(self) -> torch.Tensor | None:
-        if self.is_embed is None:
-            return None
-
-        return self.is_embed.cumsum(dim=0)
+        return None if self.is_embed is None else self.is_embed.cumsum(dim=0)
 
     @cached_property
     def get_num_embeds(self) -> int:
@@ -308,13 +305,7 @@ def batched_tensors_equal(a: BatchedTensorInputs, b: BatchedTensorInputs) -> boo
     Equality check between
     [`BatchedTensorInputs`][vllm.multimodal.inputs.BatchedTensorInputs] objects.
     """
-    for k in a:
-        if k not in b:
-            return False
-        if not nested_tensors_equal(a[k], b[k]):
-            return False
-
-    return True
+    return all(k in b and nested_tensors_equal(a[k], b[k]) for k in a)
 
 
 @dataclass
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 5f8883c16..7fbce2da8 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -487,10 +487,8 @@ class EngineCore:
             request_ids = []
             while not self.aborts_queue.empty():
                 ids = self.aborts_queue.get_nowait()
-                if isinstance(ids, str):
-                    # Should be a list here, but also handle string just in case.
-                    ids = (ids,)
-                request_ids.extend(ids)
+                # Should be a list here, but also handle string just in case.
+                request_ids.extend((ids,) if isinstance(ids, str) else ids)
             # More efficient to abort all as a single batch.
             self.abort_requests(request_ids)
 
diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py
index ea152446e..7cee1ead7 100644
--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -618,7 +618,7 @@ class InputProcessor:
 
         tokenizer = self.tokenizer
         if tokenizer is not None:
-            max_input_id = max(prompt_ids or [], default=0)
+            max_input_id = max(prompt_ids or (), default=0)
 
             # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while
             # self.model_config.get_vocab_size() is the model’s vocab size.
diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py
index e8717e151..13b332533 100644
--- a/vllm/v1/engine/output_processor.py
+++ b/vllm/v1/engine/output_processor.py
@@ -339,10 +339,7 @@ class RequestState:
             stop_reason=stop_reason if finished else None,
         )
 
-    def _new_pooling_output(
-        self,
-        pooling_output: torch.Tensor,
-    ) -> PoolingOutput:
+    def _new_pooling_output(self, pooling_output: torch.Tensor) -> PoolingOutput:
         return PoolingOutput(data=pooling_output)
 
 
@@ -695,9 +692,7 @@ class OutputProcessor:
         assert req_state.stats is not None
         iteration_stats.update_from_finished_request(
             finish_reason=finish_reason,
-            num_prompt_tokens=length_from_prompt_token_ids_or_embeds(
-                req_state.prompt_token_ids, req_state.prompt_embeds
-            ),
+            num_prompt_tokens=req_state.prompt_len,
             max_tokens_param=req_state.max_tokens_param,
             req_stats=req_state.stats,
             num_cached_tokens=req_state.num_cached_tokens,
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index 649875fe8..7b5c28eeb 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -695,7 +695,7 @@ class WorkerProc:
         worker = None
         # tuple[Connection, Connection]
         reader, ready_writer = kwargs.pop("ready_pipe")
-        death_pipe = kwargs.pop("death_pipe", None)
+        death_pipe: Connection | None = kwargs.pop("death_pipe", None)
         shutdown_event = threading.Event()
         # Start death monitoring thread if death_pipe is provided
         if death_pipe is not None:
diff --git a/vllm/v1/request.py b/vllm/v1/request.py
index f33059b80..5aaef8eb6 100644
--- a/vllm/v1/request.py
+++ b/vllm/v1/request.py
@@ -211,8 +211,7 @@ class Request:
 
     def get_num_encoder_embeds(self, input_id: int) -> int:
         assert input_id < len(self.mm_features)
-        num_embeds = self.mm_features[input_id].mm_position.get_num_embeds
-        return num_embeds
+        return self.mm_features[input_id].mm_position.get_num_embeds
 
     def record_event(
         self,
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
index c7655fe04..7fab7050c 100644
--- a/vllm/v1/structured_output/__init__.py
+++ b/vllm/v1/structured_output/__init__.py
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import itertools
 import multiprocessing
+from collections.abc import Iterable
 from concurrent.futures import Future, ThreadPoolExecutor
 from typing import TYPE_CHECKING
 
@@ -172,7 +174,7 @@ class StructuredOutputManager:
 
     def _fill_bitmasks(
         self,
-        batch: list[tuple[StructuredOutputGrammar, int, bool]],
+        batch: Iterable[tuple[StructuredOutputGrammar, int, bool]],
     ) -> None:
         assert self._grammar_bitmask is not None
         for grammar, index, apply_bitmask in batch:
@@ -265,16 +267,16 @@ class StructuredOutputManager:
                 apply_bitmask = self.should_fill_bitmask(request)
 
                 state_advancements = 0
-                req_tokens = scheduled_spec_decode_tokens.get(req_id, [])
-                for i, token in enumerate(req_tokens + [None]):
+                req_tokens = scheduled_spec_decode_tokens.get(req_id, ())
+                for token in itertools.chain(req_tokens, (None,)):
                     self._fill_bitmasks(
-                        [
+                        (
                             (
                                 structured_output_request.grammar,
                                 cumulative_index,
                                 apply_bitmask,
-                            )
-                        ]
+                            ),
+                        )
                     )
 
                     if (
diff --git a/vllm/v1/structured_output/request.py b/vllm/v1/structured_output/request.py
index 94ae36a1a..b921a71b3 100644
--- a/vllm/v1/structured_output/request.py
+++ b/vllm/v1/structured_output/request.py
@@ -28,12 +28,9 @@ class StructuredOutputRequest:
         if sampling_params is None:
             return None
         params = sampling_params.structured_outputs
-        if params:
-            if params.all_constraints_none():
-                return None
-            else:
-                return StructuredOutputRequest(params=params)
-        return None
+        if not params or params.all_constraints_none():
+            return None
+        return StructuredOutputRequest(params=params)
 
     def _check_grammar_completion(self) -> bool:
         # NOTE: We have to lazy import to gate circular imports
diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py
index 08b595845..14bbd6578 100644
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -829,7 +829,7 @@ class InputBatch:
             presence_penalties=self.presence_penalties[:num_reqs],
             repetition_penalties=self.repetition_penalties[:num_reqs],
             output_token_ids=output_token_ids,
-            spec_token_ids=cast(list[list[int]], self.spec_token_ids),
+            spec_token_ids=self.spec_token_ids,
             no_penalties=self.no_penalties,
             allowed_token_ids_mask=allowed_token_ids_mask,
             bad_words_token_ids=self.bad_words_token_ids,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 17dfcae59..3a35e1b67 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1026,7 +1026,7 @@ class GPUModelRunner(
         each sequence, and a shifting is done during the next iteration
         based on the number of accepted tokens.
         """
-        if not self.model_config.is_hybrid or not self.speculative_config:
+        if not self.speculative_config or not self.model_config.is_hybrid:
             return
 
         # Find the number of accepted tokens for each sequence.