Export NaNs in logits to scheduler_stats if output is corrupted (#18777)

Signed-off-by: Vlad Mihailescu <vtmihailescu@gmail.com>
2025-06-20 07:47:16 -07:00
parent 7e8977fcd4
commit 2e3e3c86dc
7 changed files with 104 additions and 2 deletions
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -717,6 +717,7 @@ class Scheduler(SchedulerInterface):
        prompt_logprobs_dict = model_runner_output.prompt_logprobs_dict
        num_scheduled_tokens = scheduler_output.num_scheduled_tokens
        pooler_outputs = model_runner_output.pooler_output
+        num_nans_in_logits = model_runner_output.num_nans_in_logits

        new_running: list[Request] = []
        outputs: dict[int, list[EngineCoreOutput]] = defaultdict(list)
@@ -810,6 +811,10 @@ class Scheduler(SchedulerInterface):
                request.structured_output_request.grammar.accept_tokens(  # type: ignore[union-attr]
                    req_id, new_token_ids)

+            # spec_token_ids comes from the model runner output
+            if num_nans_in_logits is not None and req_id in num_nans_in_logits:
+                request.num_nans_in_logits = num_nans_in_logits[req_id]
+
            # Add newly generated spec token ids to the request.
            if spec_token_ids is not None:
                if self.structured_output_manager.should_advance(request):
@@ -972,6 +977,8 @@ class Scheduler(SchedulerInterface):
            kv_cache_usage=self.kv_cache_manager.usage,
            prefix_cache_stats=prefix_cache_stats,
            spec_decoding_stats=spec_decoding_stats,
+            num_corrupted_reqs=sum(req.is_output_corrupted
+                                   for req in self.running),
        )

    def make_spec_decoding_stats(