[Frontend] Dynamic RoPE scaling (#4638)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import argparse
|
||||
import dataclasses
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
@@ -49,6 +50,7 @@ class EngineArgs:
|
||||
disable_log_stats: bool = False
|
||||
revision: Optional[str] = None
|
||||
code_revision: Optional[str] = None
|
||||
rope_scaling: Optional[dict] = None
|
||||
tokenizer_revision: Optional[str] = None
|
||||
quantization: Optional[str] = None
|
||||
enforce_eager: bool = False
|
||||
@@ -330,6 +332,11 @@ class EngineArgs:
|
||||
'None, we assume the model weights are not '
|
||||
'quantized and use `dtype` to determine the data '
|
||||
'type of the weights.')
|
||||
parser.add_argument('--rope-scaling',
|
||||
default=None,
|
||||
type=json.loads,
|
||||
help='RoPE scaling configuration in JSON format. '
|
||||
'For example, {"type":"dynamic","factor":2.0}')
|
||||
parser.add_argument('--enforce-eager',
|
||||
action='store_true',
|
||||
help='Always use eager-mode PyTorch. If False, '
|
||||
@@ -548,11 +555,12 @@ class EngineArgs:
|
||||
model_config = ModelConfig(
|
||||
self.model, self.tokenizer, self.tokenizer_mode,
|
||||
self.trust_remote_code, self.dtype, self.seed, self.revision,
|
||||
self.code_revision, self.tokenizer_revision, self.max_model_len,
|
||||
self.quantization, self.quantization_param_path,
|
||||
self.enforce_eager, self.max_context_len_to_capture,
|
||||
self.max_seq_len_to_capture, self.max_logprobs,
|
||||
self.skip_tokenizer_init, self.served_model_name)
|
||||
self.code_revision, self.rope_scaling, self.tokenizer_revision,
|
||||
self.max_model_len, self.quantization,
|
||||
self.quantization_param_path, self.enforce_eager,
|
||||
self.max_context_len_to_capture, self.max_seq_len_to_capture,
|
||||
self.max_logprobs, self.skip_tokenizer_init,
|
||||
self.served_model_name)
|
||||
cache_config = CacheConfig(self.block_size,
|
||||
self.gpu_memory_utilization,
|
||||
self.swap_space, self.kv_cache_dtype,
|
||||
|
||||
Reference in New Issue
Block a user