[MISC] Add lora requests to metrics (#9477)
Co-authored-by: Kunjan Patel <kunjanp_google_com@vllm.us-central1-a.c.kunjanp-gke-dev-2.internal>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import time
|
||||
from collections import Counter as collectionsCounter
|
||||
from collections import deque
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
@@ -1617,6 +1618,25 @@ class LLMEngine:
|
||||
n_requests: List[int] = []
|
||||
finished_reason_requests: List[str] = []
|
||||
|
||||
# Lora requests
|
||||
running_lora_adapters = dict(
|
||||
collectionsCounter([
|
||||
running_request.lora_request.lora_name
|
||||
for scheduler in self.scheduler
|
||||
for running_request in scheduler.running
|
||||
if running_request.lora_request
|
||||
]))
|
||||
waiting_lora_adapters = dict(
|
||||
collectionsCounter([
|
||||
waiting_request.lora_request.lora_name
|
||||
for scheduler in self.scheduler
|
||||
for waiting_request in scheduler.waiting
|
||||
if waiting_request.lora_request
|
||||
]))
|
||||
max_lora_stat = "0"
|
||||
if self.lora_config:
|
||||
max_lora_stat = str(self.lora_config.max_loras)
|
||||
|
||||
# NOTE: This loop assumes prefill seq_groups are before
|
||||
# decode seq_groups in scheduled_seq_groups.
|
||||
if scheduler_outputs is not None:
|
||||
@@ -1738,7 +1758,9 @@ class LLMEngine:
|
||||
num_generation_tokens_requests=num_generation_tokens_requests,
|
||||
n_requests=n_requests,
|
||||
finished_reason_requests=finished_reason_requests,
|
||||
)
|
||||
max_lora=str(max_lora_stat),
|
||||
waiting_lora_adapters=list(waiting_lora_adapters.keys()),
|
||||
running_lora_adapters=list(running_lora_adapters.keys()))
|
||||
|
||||
def add_lora(self, lora_request: LoRARequest) -> bool:
|
||||
return self.model_executor.add_lora(lora_request)
|
||||
|
||||
Reference in New Issue
Block a user