[Bugfix] Validate lora adapters to avoid crashing server (#11727)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -52,7 +52,7 @@ async def _async_serving_chat_init():
|
||||
engine = MockEngine()
|
||||
model_config = await engine.get_model_config()
|
||||
|
||||
models = OpenAIServingModels(model_config, BASE_MODEL_PATHS)
|
||||
models = OpenAIServingModels(engine, model_config, BASE_MODEL_PATHS)
|
||||
serving_completion = OpenAIServingChat(engine,
|
||||
model_config,
|
||||
models,
|
||||
@@ -73,7 +73,8 @@ def test_serving_chat_should_set_correct_max_tokens():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
|
||||
models = OpenAIServingModels(base_model_paths=BASE_MODEL_PATHS,
|
||||
models = OpenAIServingModels(engine_client=mock_engine,
|
||||
base_model_paths=BASE_MODEL_PATHS,
|
||||
model_config=MockModelConfig())
|
||||
serving_chat = OpenAIServingChat(mock_engine,
|
||||
MockModelConfig(),
|
||||
@@ -116,7 +117,8 @@ def test_serving_chat_could_load_correct_generation_config():
|
||||
mock_engine.errored = False
|
||||
|
||||
# Initialize the serving chat
|
||||
models = OpenAIServingModels(base_model_paths=BASE_MODEL_PATHS,
|
||||
models = OpenAIServingModels(engine_client=mock_engine,
|
||||
base_model_paths=BASE_MODEL_PATHS,
|
||||
model_config=mock_model_config)
|
||||
serving_chat = OpenAIServingChat(mock_engine,
|
||||
mock_model_config,
|
||||
|
||||
Reference in New Issue
Block a user