Add NeuronxDistributedInference support, Speculative Decoding, Dynamic on-device sampling (#16357)
Signed-off-by: Satyajith Chilappagari <satchill@amazon.com> Co-authored-by: Aaron Dou <yzdou@amazon.com> Co-authored-by: Shashwat Srijan <sssrijan@amazon.com> Co-authored-by: Chongming Ni <chongmni@amazon.com> Co-authored-by: Amulya Ballakur <amulyaab@amazon.com> Co-authored-by: Patrick Lange <patlange@amazon.com> Co-authored-by: Elaine Zhao <elaineyz@amazon.com> Co-authored-by: Lin Lin Pan <tailinpa@amazon.com> Co-authored-by: Navyadhara Gogineni <navyadha@amazon.com> Co-authored-by: Yishan McNabb <yishanm@amazon.com> Co-authored-by: Mrinal Shukla <181322398+mrinalks@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ba7703e659
commit
043e4c4955
@@ -2273,6 +2273,9 @@ class SpeculativeConfig:
|
||||
"""Scaling factor for entropy-based threshold, applied when using
|
||||
`TypicalAcceptanceSampler`."""
|
||||
|
||||
speculative_token_tree: Optional[str] = None
|
||||
"""Specifies the tree structure for speculative token generation.
|
||||
"""
|
||||
# required configuration params passed from engine
|
||||
target_model_config: ModelConfig = field(default=None,
|
||||
init=True) # type: ignore
|
||||
@@ -2447,10 +2450,11 @@ class SpeculativeConfig:
|
||||
"Chunked prefill and EAGLE are not compatible "
|
||||
"when using V0.")
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.configs.eagle import (
|
||||
EAGLEConfig)
|
||||
if isinstance(self.draft_model_config.hf_config,
|
||||
EAGLEConfig):
|
||||
EAGLEConfig) or current_platform.is_neuron():
|
||||
pass
|
||||
else:
|
||||
eagle_config = EAGLEConfig(
|
||||
|
||||
Reference in New Issue
Block a user