[Core] Prevent side-channel attacks via cache salting (#17045)

Signed-off-by: Marko Rosenmueller <5467316+dr75@users.noreply.github.com>
2025-04-30 14:27:21 +02:00
parent a7d5b016bd
commit 77073c77bc
18 changed files with 328 additions and 126 deletions
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -14,6 +14,7 @@ from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
                      ValidationInfo, field_validator, model_validator)
 from typing_extensions import TypeAlias

+from vllm import envs
 from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
 from vllm.logger import init_logger
 from vllm.pooling_params import PoolingParams
@@ -408,6 +409,15 @@ class ChatCompletionRequest(OpenAIBaseModel):
            "If specified with 'logprobs', tokens are represented "
            " as strings of the form 'token_id:{token_id}' so that tokens "
            "that are not JSON-encodable can be identified."))
+    cache_salt: Optional[str] = Field(
+        default=None,
+        description=(
+            "If specified, the prefix cache will be salted with the provided "
+            "string to prevent an attacker to guess prompts in multi-user "
+            "environments. The salt should be random, protected from "
+            "access by 3rd parties, and long enough to be "
+            "unpredictable (e.g., 43 characters base64-encoded, corresponding "
+            "to 256 bit). Not supported by vLLM engine V0."))

    # doc: end-chat-completion-extra-params

@@ -726,6 +736,20 @@ class ChatCompletionRequest(OpenAIBaseModel):
                             "`add_generation_prompt` to True.")
        return data

+    @model_validator(mode="before")
+    @classmethod
+    def check_cache_salt_support(cls, data):
+        if data.get("cache_salt") is not None:
+            if not envs.VLLM_USE_V1:
+                raise ValueError(
+                    "Parameter 'cache_salt' is not supported with "
+                    "this instance of vLLM, which uses engine V0.")
+            if not isinstance(data["cache_salt"],
+                              str) or not data["cache_salt"]:
+                raise ValueError("Parameter 'cache_salt' must be a "
+                                 "non-empty string if provided.")
+        return data
+

 class CompletionRequest(OpenAIBaseModel):
    # Ordered by official OpenAI API documentation
@@ -1622,9 +1646,9 @@ class TranscriptionRequest(OpenAIBaseModel):

    # doc: begin-transcription-extra-params
    stream: Optional[bool] = False
-    """Custom field not present in the original OpenAI definition. When set, 
+    """Custom field not present in the original OpenAI definition. When set,
    it will enable output to be streamed in a similar fashion as the Chat
-    Completion endpoint. 
+    Completion endpoint.
    """
    # Flattened stream option to simplify form data.
    stream_include_usage: Optional[bool] = False
@@ -1642,7 +1666,7 @@ class TranscriptionRequest(OpenAIBaseModel):
    """

    top_p: Optional[float] = None
-    """Enables nucleus (top-p) sampling, where tokens are selected from the 
+    """Enables nucleus (top-p) sampling, where tokens are selected from the
    smallest possible set whose cumulative probability exceeds `p`.
    """

@@ -1650,7 +1674,7 @@ class TranscriptionRequest(OpenAIBaseModel):
    """Limits sampling to the `k` most probable tokens at each step."""

    min_p: Optional[float] = None
-    """Filters out tokens with a probability lower than `min_p`, ensuring a 
+    """Filters out tokens with a probability lower than `min_p`, ensuring a
    minimum likelihood threshold during sampling.
    """