[Core] Prevent side-channel attacks via cache salting (#17045)

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
This commit is contained in:
Marko Rosenmueller
2025-04-30 14:27:21 +02:00
committed by GitHub
parent a7d5b016bd
commit 77073c77bc
18 changed files with 328 additions and 126 deletions

View File

@@ -14,6 +14,7 @@ from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
ValidationInfo, field_validator, model_validator)
from typing_extensions import TypeAlias
from vllm import envs
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.logger import init_logger
from vllm.pooling_params import PoolingParams
@@ -408,6 +409,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
"If specified with 'logprobs', tokens are represented "
" as strings of the form 'token_id:{token_id}' so that tokens "
"that are not JSON-encodable can be identified."))
cache_salt: Optional[str] = Field(
default=None,
description=(
"If specified, the prefix cache will be salted with the provided "
"string to prevent an attacker to guess prompts in multi-user "
"environments. The salt should be random, protected from "
"access by 3rd parties, and long enough to be "
"unpredictable (e.g., 43 characters base64-encoded, corresponding "
"to 256 bit). Not supported by vLLM engine V0."))
# doc: end-chat-completion-extra-params
@@ -726,6 +736,20 @@ class ChatCompletionRequest(OpenAIBaseModel):
"`add_generation_prompt` to True.")
return data
@model_validator(mode="before")
@classmethod
def check_cache_salt_support(cls, data):
if data.get("cache_salt") is not None:
if not envs.VLLM_USE_V1:
raise ValueError(
"Parameter 'cache_salt' is not supported with "
"this instance of vLLM, which uses engine V0.")
if not isinstance(data["cache_salt"],
str) or not data["cache_salt"]:
raise ValueError("Parameter 'cache_salt' must be a "
"non-empty string if provided.")
return data
class CompletionRequest(OpenAIBaseModel):
# Ordered by official OpenAI API documentation
@@ -1622,9 +1646,9 @@ class TranscriptionRequest(OpenAIBaseModel):
# doc: begin-transcription-extra-params
stream: Optional[bool] = False
"""Custom field not present in the original OpenAI definition. When set,
"""Custom field not present in the original OpenAI definition. When set,
it will enable output to be streamed in a similar fashion as the Chat
Completion endpoint.
Completion endpoint.
"""
# Flattened stream option to simplify form data.
stream_include_usage: Optional[bool] = False
@@ -1642,7 +1666,7 @@ class TranscriptionRequest(OpenAIBaseModel):
"""
top_p: Optional[float] = None
"""Enables nucleus (top-p) sampling, where tokens are selected from the
"""Enables nucleus (top-p) sampling, where tokens are selected from the
smallest possible set whose cumulative probability exceeds `p`.
"""
@@ -1650,7 +1674,7 @@ class TranscriptionRequest(OpenAIBaseModel):
"""Limits sampling to the `k` most probable tokens at each step."""
min_p: Optional[float] = None
"""Filters out tokens with a probability lower than `min_p`, ensuring a
"""Filters out tokens with a probability lower than `min_p`, ensuring a
minimum likelihood threshold during sampling.
"""