[V1][Metrics] Add several request timing histograms (#12644)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, List, Optional, Union
|
||||
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import RequestMetrics
|
||||
from vllm.v1.engine import EngineCoreRequest, FinishReason
|
||||
from vllm.v1.engine import (EngineCoreEvent, EngineCoreEventType,
|
||||
EngineCoreRequest, FinishReason)
|
||||
from vllm.v1.utils import ConstantList
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -33,14 +33,10 @@ class Request:
|
||||
self.sampling_params = sampling_params
|
||||
# Because of LoRA, the eos token id can be different for each request.
|
||||
self.eos_token_id = eos_token_id
|
||||
self.metrics = RequestMetrics(arrival_time=arrival_time,
|
||||
last_token_time=arrival_time,
|
||||
first_scheduled_time=None,
|
||||
first_token_time=None,
|
||||
time_in_queue=None)
|
||||
self.lora_request = lora_request
|
||||
|
||||
self.status = RequestStatus.WAITING
|
||||
self.events: List[EngineCoreEvent] = []
|
||||
self.stop_reason: Union[int, str, None] = None
|
||||
assert sampling_params.max_tokens is not None
|
||||
self.max_tokens = sampling_params.max_tokens
|
||||
@@ -83,6 +79,21 @@ class Request:
|
||||
lora_request=request.lora_request,
|
||||
)
|
||||
|
||||
def queued(self, timestamp: Optional[float] = None) -> None:
|
||||
self.events.append(
|
||||
EngineCoreEvent.new_event(EngineCoreEventType.QUEUED, timestamp))
|
||||
|
||||
def scheduled(self, timestamp: Optional[float] = None) -> None:
|
||||
self.events.append(
|
||||
EngineCoreEvent.new_event(EngineCoreEventType.SCHEDULED,
|
||||
timestamp))
|
||||
|
||||
def take_events(self) -> Optional[List[EngineCoreEvent]]:
|
||||
if not self.events:
|
||||
return None
|
||||
events, self.events = self.events, []
|
||||
return events
|
||||
|
||||
def append_output_token_ids(
|
||||
self,
|
||||
token_ids: Union[int, List[int]],
|
||||
|
||||
Reference in New Issue
Block a user