[Frontend][Core] Override HF config.json via CLI (#5836)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -128,8 +128,9 @@ class EngineArgs:
|
||||
disable_log_stats: bool = False
|
||||
revision: Optional[str] = None
|
||||
code_revision: Optional[str] = None
|
||||
rope_scaling: Optional[dict] = None
|
||||
rope_scaling: Optional[Dict[str, Any]] = None
|
||||
rope_theta: Optional[float] = None
|
||||
hf_overrides: Optional[Dict[str, Any]] = None
|
||||
tokenizer_revision: Optional[str] = None
|
||||
quantization: Optional[str] = None
|
||||
enforce_eager: Optional[bool] = None
|
||||
@@ -140,8 +141,9 @@ class EngineArgs:
|
||||
# is intended for expert use only. The API may change without
|
||||
# notice.
|
||||
tokenizer_pool_type: Union[str, Type["BaseTokenizerGroup"]] = "ray"
|
||||
tokenizer_pool_extra_config: Optional[dict] = None
|
||||
tokenizer_pool_extra_config: Optional[Dict[str, Any]] = None
|
||||
limit_mm_per_prompt: Optional[Mapping[str, int]] = None
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None
|
||||
enable_lora: bool = False
|
||||
max_loras: int = 1
|
||||
max_lora_rank: int = 16
|
||||
@@ -187,7 +189,6 @@ class EngineArgs:
|
||||
collect_detailed_traces: Optional[str] = None
|
||||
disable_async_output_proc: bool = False
|
||||
override_neuron_config: Optional[Dict[str, Any]] = None
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None
|
||||
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
|
||||
|
||||
# Pooling configuration.
|
||||
@@ -512,6 +513,12 @@ class EngineArgs:
|
||||
help='RoPE theta. Use with `rope_scaling`. In '
|
||||
'some cases, changing the RoPE theta improves the '
|
||||
'performance of the scaled model.')
|
||||
parser.add_argument('--hf-overrides',
|
||||
type=json.loads,
|
||||
default=EngineArgs.hf_overrides,
|
||||
help='Extra arguments for the HuggingFace config.'
|
||||
'This should be a JSON string that will be '
|
||||
'parsed into a dictionary.')
|
||||
parser.add_argument('--enforce-eager',
|
||||
action='store_true',
|
||||
help='Always use eager-mode PyTorch. If False, '
|
||||
@@ -940,6 +947,7 @@ class EngineArgs:
|
||||
code_revision=self.code_revision,
|
||||
rope_scaling=self.rope_scaling,
|
||||
rope_theta=self.rope_theta,
|
||||
hf_overrides=self.hf_overrides,
|
||||
tokenizer_revision=self.tokenizer_revision,
|
||||
max_model_len=self.max_model_len,
|
||||
quantization=self.quantization,
|
||||
|
||||
Reference in New Issue
Block a user