From fca3f4665838605e268a8408bc7ca359f5d5c14b Mon Sep 17 00:00:00 2001
From: Benjamin Bartels <benjamin@bartels.dev>
Date: Thu, 4 Dec 2025 05:50:27 +0000
Subject: [PATCH] [Frontend] Fixes anthropic /v1/messages streaming not
 containing input_tokens on first chunk (#29971)

Signed-off-by: bbartels <benjamin@bartels.dev>
---
 tests/entrypoints/openai/test_messages.py      | 11 +++++++++++
 vllm/entrypoints/anthropic/serving_messages.py | 10 +++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/tests/entrypoints/openai/test_messages.py b/tests/entrypoints/openai/test_messages.py
index 3e390ad49..b804a1a7a 100644
--- a/tests/entrypoints/openai/test_messages.py
+++ b/tests/entrypoints/openai/test_messages.py
@@ -69,9 +69,20 @@ async def test_anthropic_streaming(client: anthropic.AsyncAnthropic):
         stream=True,
     )
 
+    first_chunk = None
+    chunk_count = 0
     async for chunk in resp:
+        chunk_count += 1
+        if first_chunk is None and chunk.type == "message_start":
+            first_chunk = chunk
         print(chunk.model_dump_json())
 
+    assert chunk_count > 0
+    assert first_chunk is not None, "message_start chunk was never observed"
+    assert first_chunk.usage is not None, "first chunk should include usage stats"
+    assert first_chunk.usage["output_tokens"] == 0
+    assert first_chunk.usage["input_tokens"] > 5
+
 
 @pytest.mark.asyncio
 async def test_anthropic_tool_call(client: anthropic.AsyncAnthropic):
diff --git a/vllm/entrypoints/anthropic/serving_messages.py b/vllm/entrypoints/anthropic/serving_messages.py
index 340dabf0e..e7ea3bb59 100644
--- a/vllm/entrypoints/anthropic/serving_messages.py
+++ b/vllm/entrypoints/anthropic/serving_messages.py
@@ -183,7 +183,9 @@ class AnthropicServingMessages(OpenAIServingChat):
 
         if anthropic_request.stream:
             req.stream = anthropic_request.stream
-            req.stream_options = StreamOptions.validate({"include_usage": True})
+            req.stream_options = StreamOptions.validate(
+                {"include_usage": True, "continuous_usage_stats": True}
+            )
 
         if anthropic_request.tool_choice is None:
             req.tool_choice = None
@@ -323,6 +325,12 @@ class AnthropicServingMessages(OpenAIServingChat):
                                     content=[],
                                     model=origin_chunk.model,
                                 ),
+                                usage=AnthropicUsage(
+                                    input_tokens=origin_chunk.usage.prompt_tokens
+                                    if origin_chunk.usage
+                                    else 0,
+                                    output_tokens=0,
+                                ),
                             )
                             first_item = False
                             data = chunk.model_dump_json(exclude_unset=True)