[Core][Bugfix][Perf] Introduce MQLLMEngine to avoid asyncio OH (#8157)
Co-authored-by: Nick Hill <nickhill@us.ibm.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Co-authored-by: Simon Mo <simon.mo@hey.com>
This commit is contained in:
committed by
GitHub
parent
9d104b5beb
commit
7c7714d856
@@ -4,7 +4,7 @@ from unittest.mock import MagicMock
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.engine.protocol import AsyncEngineClient
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.protocol import (ErrorResponse,
|
||||
LoadLoraAdapterRequest,
|
||||
UnloadLoraAdapterRequest)
|
||||
@@ -18,7 +18,7 @@ LORA_UNLOADING_SUCCESS_MESSAGE = (
|
||||
|
||||
|
||||
async def _async_serving_engine_init():
|
||||
mock_engine_client = MagicMock(spec=AsyncEngineClient)
|
||||
mock_engine_client = MagicMock(spec=EngineClient)
|
||||
mock_model_config = MagicMock(spec=ModelConfig)
|
||||
# Set the max_model_len attribute to avoid missing attribute
|
||||
mock_model_config.max_model_len = 2048
|
||||
|
||||
Reference in New Issue
Block a user