[Bugfix] Miscalculated latency lead to time_to_first_token_seconds inaccurate. (#6686)
This commit is contained in:
@@ -484,7 +484,7 @@ class SpecDecodeWorker(LoraNotSupportedWorkerBase):
|
||||
for both speculation cases (num_lookahead_slots>0) and non-speculation
|
||||
cases (e.g. prefill).
|
||||
|
||||
Returns True iff there are remaining sequences to process.
|
||||
Returns True if there are remaining sequences to process.
|
||||
"""
|
||||
assert self.rank != self._driver_rank
|
||||
|
||||
|
||||
Reference in New Issue
Block a user