[UX] Deduplicate sampling parameter startup logs (#32953)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -1339,10 +1339,9 @@ class ModelConfig:
|
||||
Returns:
|
||||
A dictionary containing the non-default sampling parameters.
|
||||
"""
|
||||
if self.generation_config == "vllm":
|
||||
config = {}
|
||||
else:
|
||||
config = self.try_get_generation_config()
|
||||
src = self.generation_config
|
||||
|
||||
config = {} if src == "vllm" else self.try_get_generation_config()
|
||||
|
||||
# Overriding with given generation config
|
||||
config.update(self.override_generation_config)
|
||||
@@ -1368,13 +1367,16 @@ class ModelConfig:
|
||||
else:
|
||||
diff_sampling_param = {}
|
||||
|
||||
if diff_sampling_param:
|
||||
if diff_sampling_param and src != "vllm":
|
||||
logger.warning_once(
|
||||
"Default sampling parameters have been overridden by the "
|
||||
"model's Hugging Face generation config recommended from the "
|
||||
"model creator. If this is not intended, please relaunch "
|
||||
"vLLM instance with `--generation-config vllm`."
|
||||
"Default vLLM sampling parameters have been overridden by %s: `%s`. "
|
||||
"If this is not intended, please relaunch vLLM instance "
|
||||
"with `--generation-config vllm`.",
|
||||
"the model's `generation_config.json`" if src == "auto" else src,
|
||||
str(diff_sampling_param),
|
||||
scope="local",
|
||||
)
|
||||
|
||||
return diff_sampling_param
|
||||
|
||||
@property
|
||||
|
||||
@@ -143,14 +143,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||
if self.default_sampling_params:
|
||||
source = self.model_config.generation_config
|
||||
source = "model" if source == "auto" else source
|
||||
logger.info(
|
||||
"Using default chat sampling params from %s: %s",
|
||||
source,
|
||||
self.default_sampling_params,
|
||||
)
|
||||
if self.model_config.hf_config.model_type == "kimi_k2":
|
||||
self.tool_call_id_type = "kimi_k2"
|
||||
else:
|
||||
|
||||
@@ -72,16 +72,9 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
self.logits_processors = self.model_config.logits_processors
|
||||
|
||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
if self.default_sampling_params:
|
||||
source = self.model_config.generation_config
|
||||
source = "model" if source == "auto" else source
|
||||
logger.info(
|
||||
"Using default completion sampling params from %s: %s",
|
||||
source,
|
||||
self.default_sampling_params,
|
||||
)
|
||||
|
||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||
|
||||
async def render_completion_request(
|
||||
self,
|
||||
|
||||
@@ -221,15 +221,8 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
)
|
||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
|
||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||
if self.default_sampling_params:
|
||||
source = self.model_config.generation_config
|
||||
source = "model" if source == "auto" else source
|
||||
logger.info(
|
||||
"Using default chat sampling params from %s: %s",
|
||||
source,
|
||||
self.default_sampling_params,
|
||||
)
|
||||
|
||||
# If False (default), the "store" option is (silently) ignored and the
|
||||
# response is not stored. If True, the response is stored in memory.
|
||||
|
||||
Reference in New Issue
Block a user