[Speculative decoding] Add periodic log with time spent in proposal/scoring/verification (#6963)

This commit is contained in:
Cade Daniel
2024-08-05 01:46:44 -07:00
committed by GitHub
parent c0d8f1636c
commit 82a1b1a82b
5 changed files with 125 additions and 35 deletions

View File

@@ -907,6 +907,7 @@ class SpeculativeConfig:
speculative_max_model_len: Optional[int],
enable_chunked_prefill: bool,
use_v2_block_manager: bool,
disable_log_stats: bool,
speculative_disable_by_batch_size: Optional[int],
ngram_prompt_lookup_max: Optional[int],
ngram_prompt_lookup_min: Optional[int],
@@ -1095,7 +1096,8 @@ class SpeculativeConfig:
typical_acceptance_sampler_posterior_threshold,
typical_acceptance_sampler_posterior_alpha=\
typical_acceptance_sampler_posterior_alpha,
disable_logprobs=disable_logprobs
disable_logprobs=disable_logprobs,
disable_log_stats=disable_log_stats,
)
@staticmethod
@@ -1189,6 +1191,7 @@ class SpeculativeConfig:
typical_acceptance_sampler_posterior_threshold: float,
typical_acceptance_sampler_posterior_alpha: float,
disable_logprobs: bool,
disable_log_stats: bool,
):
"""Create a SpeculativeConfig object.
@@ -1221,6 +1224,8 @@ class SpeculativeConfig:
sampling, target sampling, and after accepted tokens are
determined. If set to False, log probabilities will be
returned.
disable_log_stats: Whether to disable periodic printing of stage
times in speculative decoding.
"""
self.draft_model_config = draft_model_config
self.draft_parallel_config = draft_parallel_config
@@ -1235,6 +1240,7 @@ class SpeculativeConfig:
self.typical_acceptance_sampler_posterior_alpha = \
typical_acceptance_sampler_posterior_alpha
self.disable_logprobs = disable_logprobs
self.disable_log_stats = disable_log_stats
self._verify_args()