[Speculative Decoding] MLPSpeculator Tensor Parallel support (1/2) (#6050)
Co-authored-by: Sirej Dua <sirej.dua@databricks.com> Co-authored-by: Sirej Dua <Sirej Dua>
This commit is contained in:
@@ -957,12 +957,6 @@ class SpeculativeConfig:
|
||||
)
|
||||
|
||||
draft_hf_config = draft_model_config.hf_config
|
||||
if (draft_hf_config.model_type == "mlp_speculator"
|
||||
and target_parallel_config.world_size != 1):
|
||||
# MLPSpeculator TP support will be added very soon
|
||||
raise ValueError(
|
||||
"Speculative decoding with mlp_speculator models does not "
|
||||
"yet support distributed inferencing (TP > 1).")
|
||||
|
||||
if (num_speculative_tokens is not None
|
||||
and hasattr(draft_hf_config, "num_lookahead_tokens")):
|
||||
|
||||
Reference in New Issue
Block a user