diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 7651bf988..9001e3181 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -815,20 +815,12 @@ class NixlConnectorScheduler: # Only trigger 1 KV transfer per request. params["do_remote_prefill"] = False - def build_connector_meta( + def _build_save_meta( self, + meta: NixlConnectorMetadata, scheduler_output: SchedulerOutput, - ) -> KVConnectorMetadata: - meta = NixlConnectorMetadata() - - # Loop through scheduled reqs and convert to ReqMeta. - for req_id, (req, block_ids) in self._reqs_need_recv.items(): - assert req.kv_transfer_params is not None - meta.add_new_req_to_recv( - request_id=req_id, - local_block_ids=block_ids, - kv_transfer_params=req.kv_transfer_params, - ) + ) -> None: + # only called when use_host_buffer is True to build the save metadata # NOTE: For the prefill side, there might be a chance that an early added # request is a chunked prefill, so we need to check if new blocks are added @@ -858,6 +850,24 @@ class NixlConnectorScheduler: # Therefore, only pop if `not is_partial`. self._reqs_need_save.pop(req_id) + def build_connector_meta( + self, + scheduler_output: SchedulerOutput, + ) -> KVConnectorMetadata: + meta = NixlConnectorMetadata() + + # Loop through scheduled reqs and convert to ReqMeta. + for req_id, (req, block_ids) in self._reqs_need_recv.items(): + assert req.kv_transfer_params is not None + meta.add_new_req_to_recv( + request_id=req_id, + local_block_ids=block_ids, + kv_transfer_params=req.kv_transfer_params, + ) + + if self.use_host_buffer: + self._build_save_meta(meta, scheduler_output) + meta.reqs_to_send = self._reqs_need_send meta.reqs_in_batch = self._reqs_in_batch meta.reqs_not_processed = self._reqs_not_processed