[V1][Bugfix]: vllm v1 verison metric num_gpu_blocks is None (#15755)

Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
This commit is contained in:
rongfu.leng
2025-04-30 18:20:39 +08:00
committed by GitHub
parent 1534d389af
commit d803786731
4 changed files with 37 additions and 11 deletions

View File

@@ -1,4 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
import json
import os
import queue
import signal
@@ -116,6 +117,7 @@ class EngineCore:
logger.info("Batch queue is enabled with size %d",
self.batch_queue_size)
self.batch_queue = queue.Queue(self.batch_queue_size)
self.vllm_config = vllm_config
def _initialize_kv_caches(
self, vllm_config: VllmConfig) -> tuple[int, int, KVCacheConfig]:
@@ -507,7 +509,12 @@ class EngineCoreProc(EngineCore):
bind=False) as socket:
# Send ready message to front-end once input socket is connected.
socket.send(b'READY')
message_dict = {
'type': 'READY',
'num_gpu_blocks': self.vllm_config.cache_config.num_gpu_blocks,
}
message = json.dumps(message_dict).encode('utf-8')
socket.send(message)
while True:
# (RequestType, RequestData)