[kv_offload+HMA][0/N]: Support block-level preemption handling (#34805)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
This commit is contained in:
Or Ozeri
2026-03-18 08:49:53 +02:00
committed by GitHub
parent 86b7e3c95a
commit fcf0687b27
7 changed files with 27 additions and 25 deletions

View File

@@ -231,10 +231,11 @@ def test_multi_example_connector_consistency():
]
# First three events are from initialization (register_kv_caches,
# set_host_xfer_buffer_ops, get_handshake_metadata), then generate() events.
assert events["storage1-WORKER"][:7] == [
assert events["storage1-WORKER"][:8] == [
"register_kv_caches",
"set_host_xfer_buffer_ops",
"get_handshake_metadata",
"handle_preemptions",
"bind_connector_metadata",
"start_load_kv",
"wait_for_layer_load",
@@ -246,10 +247,11 @@ def test_multi_example_connector_consistency():
"update_state_after_alloc num_blocks=[0] 0",
"build_connector_meta",
]
assert events["storage2-WORKER"][:7] == [
assert events["storage2-WORKER"][:8] == [
"register_kv_caches",
"set_host_xfer_buffer_ops",
"get_handshake_metadata",
"handle_preemptions",
"bind_connector_metadata",
"start_load_kv",
"wait_for_layer_load",
@@ -399,8 +401,8 @@ def test_multi_connector_handle_preemptions_integration():
# testing the delegation behavior of MultiConnector here.
# The connector attribute contains the KV connector.
assert scheduler.connector is not None, "Scheduler should have a connector"
preempted_req_ids = {"req-1", "req-2", "req-3"}
scheduler.connector.handle_preemptions(preempted_req_ids)
connector_md = scheduler.connector.build_connector_meta(scheduler.schedule())
scheduler.connector.handle_preemptions(connector_md)
# Verify both connectors received the handle_preemptions call
events = get_connector_events()

View File

@@ -363,10 +363,7 @@ class RequestRunner:
assert kv_connector_metadata is not None
assert isinstance(kv_connector_metadata, OffloadingConnectorMetadata)
if scheduler_output.preempted_req_ids:
self.worker_connector.handle_preemptions(
scheduler_output.preempted_req_ids
)
self.worker_connector.handle_preemptions(kv_connector_metadata)
self.worker_connector.bind_connector_metadata(kv_connector_metadata)
self.worker_connector.start_load_kv(self._dummy_ctx)