[Refactor] [6/N] to simplify the vLLM openai chat_completion serving architecture (#32240)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2026-01-13 21:01:39 +08:00
committed by GitHub
parent a5bbbd2f24
commit fefce49807
128 changed files with 1221 additions and 1008 deletions

View File

@@ -9,7 +9,7 @@ from unittest.mock import patch
import pytest
from vllm.entrypoints.openai.serving_chat_stream_harmony import (
from vllm.entrypoints.openai.chat_completion.stream_harmony import (
extract_harmony_streaming_delta,
)
@@ -82,7 +82,7 @@ class TestExtractHarmonyStreamingDelta:
assert tools_streamed is False
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
@patch("vllm.entrypoints.openai.serving_chat_stream_harmony.make_tool_call_id")
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_new_tool_call(self, mock_make_tool_call_id, channel):
"""Test new tool call creation when recipient changes."""
mock_make_tool_call_id.return_value = "call_test123"