[Frontend] Support override generation config in args (#12409)

Signed-off-by: liuyanyi <wolfsonliu@163.com>
This commit is contained in:
Yanyi Liu
2025-01-29 17:41:01 +08:00
committed by GitHub
parent d93bf4da85
commit ff7424f491
3 changed files with 100 additions and 8 deletions

View File

@@ -165,6 +165,8 @@ class ModelConfig:
`logits_processors` extra completion argument. Defaults to None,
which allows no processors.
generation_config: Configuration parameter file for generation.
override_generation_config: Override the generation config with the
given config.
"""
def compute_hash(self) -> str:
@@ -225,6 +227,7 @@ class ModelConfig:
logits_processor_pattern: Optional[str] = None,
generation_config: Optional[str] = None,
enable_sleep_mode: bool = False,
override_generation_config: Optional[Dict[str, Any]] = None,
) -> None:
self.model = model
self.tokenizer = tokenizer
@@ -368,6 +371,7 @@ class ModelConfig:
self.logits_processor_pattern = logits_processor_pattern
self.generation_config = generation_config
self.override_generation_config = override_generation_config or {}
self._verify_quantization()
self._verify_cuda_graph()
@@ -904,8 +908,13 @@ class ModelConfig:
"""
if self.generation_config is None:
# When generation_config is not set
return {}
config = self.try_get_generation_config()
config = {}
else:
config = self.try_get_generation_config()
# Overriding with given generation config
config.update(self.override_generation_config)
available_params = [
"repetition_penalty",
"temperature",