[Misc] Remove deprecated arg for cuda graph capture (#9864)
Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -84,9 +84,6 @@ class ModelConfig:
|
||||
disable CUDA graph and always execute the model in eager mode.
|
||||
If False, we will use CUDA graph and eager execution in hybrid.
|
||||
If None, the user did not specify, so default to False.
|
||||
max_context_len_to_capture: Maximum context len covered by CUDA graphs.
|
||||
When a sequence has context length larger than this, we fall back
|
||||
to eager mode (DEPRECATED. Use max_seq_len_to_capture instead).
|
||||
max_seq_len_to_capture: Maximum sequence len covered by CUDA graphs.
|
||||
When a sequence has context length larger than this, we fall back
|
||||
to eager mode. Additionally for encoder-decoder models, if the
|
||||
@@ -147,7 +144,6 @@ class ModelConfig:
|
||||
quantization: Optional[str] = None,
|
||||
quantization_param_path: Optional[str] = None,
|
||||
enforce_eager: Optional[bool] = None,
|
||||
max_context_len_to_capture: Optional[int] = None,
|
||||
max_seq_len_to_capture: Optional[int] = None,
|
||||
max_logprobs: int = 20,
|
||||
disable_sliding_window: bool = False,
|
||||
@@ -181,9 +177,6 @@ class ModelConfig:
|
||||
self.quantization = quantization
|
||||
self.quantization_param_path = quantization_param_path
|
||||
self.enforce_eager = enforce_eager
|
||||
if max_context_len_to_capture is not None:
|
||||
raise ValueError("`max_context_len_to_capture` is deprecated. "
|
||||
"Use `max_seq_len_to_capture` instead.")
|
||||
self.max_seq_len_to_capture = max_seq_len_to_capture
|
||||
self.max_logprobs = max_logprobs
|
||||
self.disable_sliding_window = disable_sliding_window
|
||||
|
||||
Reference in New Issue
Block a user