[Core] Prevent side-channel attacks via cache salting (#17045)

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
This commit is contained in:
Marko Rosenmueller
2025-04-30 14:27:21 +02:00
committed by GitHub
parent a7d5b016bd
commit 77073c77bc
18 changed files with 328 additions and 126 deletions

View File

@@ -29,6 +29,7 @@ class Request:
arrival_time: float,
lora_request: Optional["LoRARequest"] = None,
structured_output_request: Optional["StructuredOutputRequest"] = None,
cache_salt: Optional[str] = None,
) -> None:
self.request_id = request_id
self.sampling_params = sampling_params
@@ -51,6 +52,7 @@ class Request:
self._all_token_ids: list[int] = self.prompt_token_ids.copy()
self.spec_token_ids: list[int] = []
self.num_computed_tokens = 0
self.cache_salt: Optional[str] = cache_salt
# Multi-modal related
self.mm_positions = multi_modal_placeholders or []
@@ -89,6 +91,7 @@ class Request:
lora_request=request.lora_request,
structured_output_request=StructuredOutputRequest(
sampling_params=request.sampling_params),
cache_salt=request.cache_salt,
)
def append_output_token_ids(