[UX] Deduplicate sampling parameter startup logs (#32953)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -1339,10 +1339,9 @@ class ModelConfig:
|
|||||||
Returns:
|
Returns:
|
||||||
A dictionary containing the non-default sampling parameters.
|
A dictionary containing the non-default sampling parameters.
|
||||||
"""
|
"""
|
||||||
if self.generation_config == "vllm":
|
src = self.generation_config
|
||||||
config = {}
|
|
||||||
else:
|
config = {} if src == "vllm" else self.try_get_generation_config()
|
||||||
config = self.try_get_generation_config()
|
|
||||||
|
|
||||||
# Overriding with given generation config
|
# Overriding with given generation config
|
||||||
config.update(self.override_generation_config)
|
config.update(self.override_generation_config)
|
||||||
@@ -1368,13 +1367,16 @@ class ModelConfig:
|
|||||||
else:
|
else:
|
||||||
diff_sampling_param = {}
|
diff_sampling_param = {}
|
||||||
|
|
||||||
if diff_sampling_param:
|
if diff_sampling_param and src != "vllm":
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
"Default sampling parameters have been overridden by the "
|
"Default vLLM sampling parameters have been overridden by %s: `%s`. "
|
||||||
"model's Hugging Face generation config recommended from the "
|
"If this is not intended, please relaunch vLLM instance "
|
||||||
"model creator. If this is not intended, please relaunch "
|
"with `--generation-config vllm`.",
|
||||||
"vLLM instance with `--generation-config vllm`."
|
"the model's `generation_config.json`" if src == "auto" else src,
|
||||||
|
str(diff_sampling_param),
|
||||||
|
scope="local",
|
||||||
)
|
)
|
||||||
|
|
||||||
return diff_sampling_param
|
return diff_sampling_param
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|||||||
@@ -143,14 +143,6 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||||
self.enable_force_include_usage = enable_force_include_usage
|
self.enable_force_include_usage = enable_force_include_usage
|
||||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||||
if self.default_sampling_params:
|
|
||||||
source = self.model_config.generation_config
|
|
||||||
source = "model" if source == "auto" else source
|
|
||||||
logger.info(
|
|
||||||
"Using default chat sampling params from %s: %s",
|
|
||||||
source,
|
|
||||||
self.default_sampling_params,
|
|
||||||
)
|
|
||||||
if self.model_config.hf_config.model_type == "kimi_k2":
|
if self.model_config.hf_config.model_type == "kimi_k2":
|
||||||
self.tool_call_id_type = "kimi_k2"
|
self.tool_call_id_type = "kimi_k2"
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -72,16 +72,9 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
self.logits_processors = self.model_config.logits_processors
|
self.logits_processors = self.model_config.logits_processors
|
||||||
|
|
||||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
|
||||||
self.enable_force_include_usage = enable_force_include_usage
|
self.enable_force_include_usage = enable_force_include_usage
|
||||||
if self.default_sampling_params:
|
|
||||||
source = self.model_config.generation_config
|
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||||
source = "model" if source == "auto" else source
|
|
||||||
logger.info(
|
|
||||||
"Using default completion sampling params from %s: %s",
|
|
||||||
source,
|
|
||||||
self.default_sampling_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def render_completion_request(
|
async def render_completion_request(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -221,15 +221,8 @@ class OpenAIServingResponses(OpenAIServing):
|
|||||||
)
|
)
|
||||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||||
self.enable_force_include_usage = enable_force_include_usage
|
self.enable_force_include_usage = enable_force_include_usage
|
||||||
|
|
||||||
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
self.default_sampling_params = self.model_config.get_diff_sampling_param()
|
||||||
if self.default_sampling_params:
|
|
||||||
source = self.model_config.generation_config
|
|
||||||
source = "model" if source == "auto" else source
|
|
||||||
logger.info(
|
|
||||||
"Using default chat sampling params from %s: %s",
|
|
||||||
source,
|
|
||||||
self.default_sampling_params,
|
|
||||||
)
|
|
||||||
|
|
||||||
# If False (default), the "store" option is (silently) ignored and the
|
# If False (default), the "store" option is (silently) ignored and the
|
||||||
# response is not stored. If True, the response is stored in memory.
|
# response is not stored. If True, the response is stored in memory.
|
||||||
|
|||||||
Reference in New Issue
Block a user