[P/D] NIXL Integration (#17751)

Signed-off-by: ApostaC <yihua98@uchicago.edu> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Brent Salisbury <bsalisbu@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: ApostaC <yihua98@uchicago.edu> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Tyler Michael Smith <tysmith@redhat.com> Co-authored-by: Brent Salisbury <bsalisbu@redhat.com>
2025-05-12 12:46:16 -04:00
parent 05a4324f8e
commit d19110204c
34 changed files with 2723 additions and 108 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -8,6 +8,7 @@ import inspect
 import json
 import re
 import textwrap
+import uuid
 import warnings
 from collections import Counter
 from contextlib import contextmanager
@@ -3438,6 +3439,9 @@ class KVTransferConfig:
    """The KV connector for vLLM to transmit KV caches between vLLM instances.
    """

+    engine_id: str = str(uuid.uuid4())
+    """The engine id for KV transfers."""
+
    kv_buffer_device: Optional[str] = "cuda"
    """The device used by kv connector to buffer the KV cache.
    Currently only support 'cuda'."""
@@ -3448,7 +3452,7 @@ class KVTransferConfig:

    kv_role: Optional[KVRole] = None
    """Whether this vLLM instance produces, consumes KV cache, or both. Choices
-    are 'kv_producer', 'kv_consumer', and 'both'."""
+    are 'kv_producer', 'kv_consumer', and 'kv_both'."""

    kv_rank: Optional[int] = None
    """The rank of this vLLM instance in the KV cache transfer. Typical value: