[Bugfix] Last token measurement fix (#11376)
Signed-off-by: rajveerb <46040700+rajveerb@users.noreply.github.com> Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
df04dffade
commit
b5cbe8eeb3
@@ -667,6 +667,7 @@ class SequenceGroup:
|
||||
first_scheduled_time=None,
|
||||
first_token_time=None,
|
||||
time_in_queue=None)
|
||||
self.last_token_latency = 0.0
|
||||
self.lora_request = lora_request
|
||||
self.prompt_logprobs: Optional[PromptLogprobs] = None
|
||||
self.state = SequenceGroupState()
|
||||
@@ -762,18 +763,21 @@ class SequenceGroup:
|
||||
assert num_lookahead_slots + 1 == num_scheduler_steps or is_prefill
|
||||
self.init_multi_step(num_steps=num_lookahead_slots + 1)
|
||||
|
||||
def get_last_latency(self, now: float) -> float:
|
||||
def set_last_token_time(self, now: float) -> None:
|
||||
"""Sets the last token time for Request level timings."""
|
||||
# If still in prefill phase, raise Error.
|
||||
if self.is_prefill():
|
||||
raise ValueError(
|
||||
"seq_group.get_last_latency() should not be called "
|
||||
"if the seq_group is in prefill phase.")
|
||||
|
||||
# Otherwise return token latency.
|
||||
latency = now - self.metrics.last_token_time
|
||||
# If still in prefill phase, assertion fails.
|
||||
assert not self.is_prefill(), (
|
||||
"seq_group.set_last_token_time() should not be called "
|
||||
"if the seq_group is in prefill phase.")
|
||||
self.last_token_latency = now - self.metrics.last_token_time
|
||||
self.metrics.last_token_time = now
|
||||
return latency
|
||||
|
||||
def get_last_token_latency(self) -> float:
|
||||
"""Returns the latency of the last token."""
|
||||
assert not self.is_prefill(), (
|
||||
"seq_group.get_last_token_latency() should not be called "
|
||||
"if the seq_group is in prefill phase.")
|
||||
return self.last_token_latency
|
||||
|
||||
def maybe_set_first_token_time(self, time: float) -> None:
|
||||
"""Sets the first token time for Request level timings."""
|
||||
|
||||
Reference in New Issue
Block a user