[Chore] Remove more V0 dead code from sequence.py (#31783)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -13,7 +13,6 @@ from vllm.logger import init_logger
|
||||
from vllm.logprobs import PromptLogprobs, SampleLogprobs
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal.inputs import MultiModalPlaceholderDict
|
||||
from vllm.sequence import RequestMetrics
|
||||
from vllm.v1.metrics.stats import RequestStateStats
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -113,7 +112,7 @@ class RequestOutput:
|
||||
prompt_logprobs: PromptLogprobs | None,
|
||||
outputs: list[CompletionOutput],
|
||||
finished: bool,
|
||||
metrics: RequestMetrics | RequestStateStats | None = None,
|
||||
metrics: RequestStateStats | None = None,
|
||||
lora_request: LoRARequest | None = None,
|
||||
encoder_prompt: str | None = None,
|
||||
encoder_prompt_token_ids: list[int] | None = None,
|
||||
|
||||
@@ -12,40 +12,6 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
KVConnectorOutput = Any
|
||||
|
||||
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
|
||||
|
||||
VLLM_INVALID_TOKEN_ID = -1
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestMetrics:
|
||||
"""Metrics associated with a request.
|
||||
|
||||
Attributes:
|
||||
arrival_time: The time when the request arrived.
|
||||
first_scheduled_time: The time when the request was first scheduled.
|
||||
first_token_time: The time when the first token was generated.
|
||||
time_in_queue: The time the request spent in the queue.
|
||||
finished_time: The time when the request was finished.
|
||||
scheduler_time: The time spent in the scheduler when this request was
|
||||
being considered by the scheduler.
|
||||
model_forward_time: The time spent in the model forward pass when this
|
||||
request was in the batch.
|
||||
model_execute_time: The time spent in the model execute function. This
|
||||
will include model forward, block/sync across
|
||||
workers, cpu-gpu sync time and sampling time.
|
||||
"""
|
||||
|
||||
arrival_time: float
|
||||
last_token_time: float
|
||||
first_scheduled_time: float | None
|
||||
first_token_time: float | None
|
||||
time_in_queue: float | None
|
||||
finished_time: float | None = None
|
||||
scheduler_time: float | None = None
|
||||
model_forward_time: float | None = None
|
||||
model_execute_time: float | None = None
|
||||
|
||||
|
||||
# cannot use msgspec.Struct here because Dynamo does not support it
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user