Construct KVTransferConfig properly from Python instead of using JSON blobs without CLI (#17994)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-05-12 19:25:33 +01:00
parent 98ea35601c
commit 72a3f6b898
5 changed files with 37 additions and 31 deletions
--- a/examples/lmcache/disagg_prefill_lmcache_v0.py
+++ b/examples/lmcache/disagg_prefill_lmcache_v0.py
@@ -49,9 +49,10 @@ def run_prefill(prefill_done, prompts):

    sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=1)

-    ktc = KVTransferConfig.from_cli(
-        '{"kv_connector":"LMCacheConnector","kv_role":"kv_producer","kv_rank":0,"kv_parallel_size":2}'
-    )
+    ktc = KVTransferConfig(kv_connector="LMCacheConnector",
+                           kv_role="kv_producer",
+                           kv_rank=0,
+                           kv_parallel_size=2)
    # Set GPU memory utilization to 0.8 for an A40 GPU with 40GB
    # memory. Reduce the value if your GPU has less memory.
    llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.2",
@@ -78,9 +79,10 @@ def run_decode(prefill_done, prompts, timeout=1):

    sampling_params = SamplingParams(temperature=0, top_p=0.95, max_tokens=10)

-    ktc = KVTransferConfig.from_cli(
-        '{"kv_connector":"LMCacheConnector","kv_role":"kv_consumer","kv_rank":1,"kv_parallel_size":2}'
-    )
+    ktc = KVTransferConfig(kv_connector="LMCacheConnector",
+                           kv_role="kv_consumer",
+                           kv_rank=1,
+                           kv_parallel_size=2)
    # Set GPU memory utilization to 0.8 for an A40 GPU with 40GB
    # of memory. Reduce the value if your GPU has less memory.
    llm = LLM(model="mistralai/Mistral-7B-Instruct-v0.2",