[V0 Deprecation] Remove MQLLMEngine (#25019)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
@@ -13,12 +13,12 @@ import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
from vllm.config.multimodal import MultiModalConfig
|
||||
from vllm.engine.multiprocessing.client import MQLLMEngineClient
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from vllm.entrypoints.openai.serving_models import (BaseModelPath,
|
||||
OpenAIServingModels)
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
@@ -276,7 +276,7 @@ def test_async_serving_chat_init():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_serving_chat_returns_correct_model_name():
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
@@ -312,7 +312,7 @@ async def test_serving_chat_returns_correct_model_name():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_serving_chat_should_set_correct_max_tokens():
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
@@ -355,7 +355,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
|
||||
}
|
||||
|
||||
# Reinitialize the engine with new settings
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
@@ -410,7 +410,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
|
||||
}
|
||||
|
||||
# Reinitialize the engine with new settings
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
@@ -467,7 +467,7 @@ async def test_serving_chat_could_load_correct_generation_config():
|
||||
"repetition_penalty": 1.05
|
||||
}
|
||||
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
@@ -523,7 +523,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
|
||||
mock_model_config = MockModelConfig()
|
||||
mock_model_config.hf_config.model_type = model_type
|
||||
|
||||
mock_engine = MagicMock(spec=MQLLMEngineClient)
|
||||
mock_engine = MagicMock(spec=AsyncLLM)
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user