[Bugfix] Fix the max_seq_len limit of 16384 for DeepSeek models (#20322)
Signed-off-by: Wang Huaqiang <huaqiang.wang@intel.com>
This commit is contained in:
@@ -1442,10 +1442,17 @@ class ModelConfig:
|
||||
return getattr(self.hf_config, "matryoshka_dimensions", None)
|
||||
|
||||
def get_and_verify_max_len(self, max_model_len: int):
|
||||
tokenizer_config = try_get_tokenizer_config(
|
||||
self.tokenizer,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
revision=self.tokenizer_revision)
|
||||
# For pooling models, the tokenizer's `model_max_length` is often a
|
||||
# reliable source for the maximum sequence length. However, for
|
||||
# generative models, this can be incorrect and unduly limit the
|
||||
# context window (e.g., DeepSeek-R1). Therefore, we only consider
|
||||
# tokenizer_config for pooling models.
|
||||
tokenizer_config = None
|
||||
if self.runner_type == "pooling":
|
||||
tokenizer_config = try_get_tokenizer_config(
|
||||
self.tokenizer,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
revision=self.tokenizer_revision)
|
||||
max_model_len = _get_and_verify_max_len(
|
||||
hf_config=self.hf_text_config,
|
||||
tokenizer_config=tokenizer_config,
|
||||
|
||||
Reference in New Issue
Block a user