[UX] Deduplicate sampling parameter startup logs (#32953)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-24 17:37:28 +08:00
committed by GitHub
parent 06b557ecd9
commit 51931c5c9a
4 changed files with 14 additions and 34 deletions

View File

@@ -1339,10 +1339,9 @@ class ModelConfig:
Returns:
A dictionary containing the non-default sampling parameters.
"""
if self.generation_config == "vllm":
config = {}
else:
config = self.try_get_generation_config()
src = self.generation_config
config = {} if src == "vllm" else self.try_get_generation_config()
# Overriding with given generation config
config.update(self.override_generation_config)
@@ -1368,13 +1367,16 @@ class ModelConfig:
else:
diff_sampling_param = {}
if diff_sampling_param:
if diff_sampling_param and src != "vllm":
logger.warning_once(
"Default sampling parameters have been overridden by the "
"model's Hugging Face generation config recommended from the "
"model creator. If this is not intended, please relaunch "
"vLLM instance with `--generation-config vllm`."
"Default vLLM sampling parameters have been overridden by %s: `%s`. "
"If this is not intended, please relaunch vLLM instance "
"with `--generation-config vllm`.",
"the model's `generation_config.json`" if src == "auto" else src,
str(diff_sampling_param),
scope="local",
)
return diff_sampling_param
@property

View File

@@ -143,14 +143,6 @@ class OpenAIServingChat(OpenAIServing):
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = self.model_config.get_diff_sampling_param()
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default chat sampling params from %s: %s",
source,
self.default_sampling_params,
)
if self.model_config.hf_config.model_type == "kimi_k2":
self.tool_call_id_type = "kimi_k2"
else:

View File

@@ -72,16 +72,9 @@ class OpenAIServingCompletion(OpenAIServing):
self.logits_processors = self.model_config.logits_processors
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.default_sampling_params = self.model_config.get_diff_sampling_param()
self.enable_force_include_usage = enable_force_include_usage
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default completion sampling params from %s: %s",
source,
self.default_sampling_params,
)
self.default_sampling_params = self.model_config.get_diff_sampling_param()
async def render_completion_request(
self,

View File

@@ -221,15 +221,8 @@ class OpenAIServingResponses(OpenAIServing):
)
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = self.model_config.get_diff_sampling_param()
if self.default_sampling_params:
source = self.model_config.generation_config
source = "model" if source == "auto" else source
logger.info(
"Using default chat sampling params from %s: %s",
source,
self.default_sampling_params,
)
# If False (default), the "store" option is (silently) ignored and the
# response is not stored. If True, the response is stored in memory.