[KV offload][4/N] Offloading KV connector (#22595)

Signed-off-by: Or Ozeri <oro@il.ibm.com>
This commit is contained in:
Or Ozeri
2025-09-19 22:07:17 +03:00
committed by GitHub
parent b716ab93a7
commit c59a0eca42
6 changed files with 1111 additions and 1 deletions

View File

@@ -176,6 +176,7 @@ def create_model_runner_output(
finished_sending: Optional[list[str]] = None,
finished_recving: Optional[list[str]] = None,
use_eos: bool = False,
token_id: int = 0,
) -> ModelRunnerOutput:
"""Make dummy model runner output for testing."""
@@ -184,7 +185,7 @@ def create_model_runner_output(
req_id_to_index = {req_id: idx for idx, req_id in enumerate(req_ids)}
# Make sampled tokens.
sampled_token = EOS_TOKEN_ID if use_eos else 0
sampled_token = EOS_TOKEN_ID if use_eos else token_id
sampled_token_ids = [[sampled_token] for _ in req_ids]
kv_connector_output = None if (