[Frontend] Customizable RoPE theta (#5197)

2024-06-11 17:42:26 +00:00
parent 00e6a2dc53
commit dcbf4286af
5 changed files with 27 additions and 8 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -53,6 +53,7 @@ class EngineArgs:
    revision: Optional[str] = None
    code_revision: Optional[str] = None
    rope_scaling: Optional[dict] = None
+    rope_theta: Optional[float] = None
    tokenizer_revision: Optional[str] = None
    quantization: Optional[str] = None
    enforce_eager: bool = False
@@ -400,6 +401,12 @@ class EngineArgs:
                            type=json.loads,
                            help='RoPE scaling configuration in JSON format. '
                            'For example, {"type":"dynamic","factor":2.0}')
+        parser.add_argument('--rope-theta',
+                            default=None,
+                            type=float,
+                            help='RoPE theta. Use with `rope_scaling`. In '
+                            'some cases, changing the RoPE theta improves the '
+                            'performance of the scaled model.')
        parser.add_argument('--enforce-eager',
                            action='store_true',
                            help='Always use eager-mode PyTorch. If False, '
@@ -630,6 +637,7 @@ class EngineArgs:
            revision=self.revision,
            code_revision=self.code_revision,
            rope_scaling=self.rope_scaling,
+            rope_theta=self.rope_theta,
            tokenizer_revision=self.tokenizer_revision,
            max_model_len=self.max_model_len,
            quantization=self.quantization,