[Model][Spec Decode] Nemotron-H MTP and Mamba Speculative Decoding Support (#33726)
Signed-off-by: Shahar Mor <smor@nvidia.com> Signed-off-by: Benjamin Chislett <bchislett@nvidia.com> Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Co-authored-by: Shahar Mor <smor@nvidia.com> Co-authored-by: Roi Koren <roik@nvidia.com> Co-authored-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
committed by
GitHub
parent
a9e15e040d
commit
f5972a872f
@@ -395,6 +395,15 @@ class VllmConfig:
|
||||
]
|
||||
return hash_str
|
||||
|
||||
@property
|
||||
def num_speculative_tokens(self) -> int:
|
||||
if (
|
||||
self.speculative_config is not None
|
||||
and self.speculative_config.num_speculative_tokens is not None
|
||||
):
|
||||
return self.speculative_config.num_speculative_tokens
|
||||
return 0
|
||||
|
||||
@property
|
||||
def needs_dp_coordinator(self) -> bool:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user