From d64324e233f99dcc536e65c762f87cea554f7d14 Mon Sep 17 00:00:00 2001
From: Matthew Harris <byte@byteflux.net>
Date: Wed, 6 May 2026 15:48:44 -0400
Subject: [PATCH] patch to fix MLA multi-group KV cache

---
 Dockerfile  | 25 ++++++++++++++++++++++++-
 Jenkinsfile |  6 +++---
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 07234ff..b58b3f5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,3 +1,26 @@
 FROM vllm/vllm-openai-rocm:nightly
 
-RUN pip install av soundfile
\ No newline at end of file
+ENV MAX_JOBS=2
+
+# LMCache for KV cache offloading / sharing across nodes
+# Build HIP extensions for MI300X (gfx942)
+RUN apt-get update && apt-get install -y git && \
+    git clone https://github.com/Byteflux/LMCache.git /tmp/lmcache && \
+    cd /tmp/lmcache && \
+    git checkout mla-multi-group-kv-cache-with-redis && \
+    pip install --no-cache-dir -r requirements/build.txt && \
+    BUILD_WITH_HIP=1 \
+    CXX=hipcc \
+    PYTORCH_ROCM_ARCH="gfx942" \
+    pip install --no-cache-dir --no-build-isolation . --verbose && \
+    rm -rf /tmp/lmcache && export CACHE_BUSTER=1
+
+# Nemotron reasoning parser
+COPY ./super_v3_reasoning_parser.py /opt/super_v3_reasoning_parser.py
+
+# DeepSeek tool call parser with MTP fixes
+COPY deepseekv32_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/deepseekv32_tool_parser.py
+
+# MiniMax tool call parser with kwargs fixes
+COPY minimax_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/minimax_tool_parser.py
+COPY minimax_m2_parser.py /usr/local/lib/python3.12/dist-packages/vllm/parser/minimax_m2_parser.py
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 439582b..3de384e 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -7,10 +7,10 @@ pipeline {
     }
 
     parameters {
-        string(name: 'IMAGE_TAG', defaultValue: 'v0.19.0-cu130', description: 'Docker image tag')
+        string(name: 'IMAGE_TAG', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Docker image tag')
         string(name: 'GIT_REPO', defaultValue: 'https://sweetapi.com/biondizzle/vllm-with-lmcache.git', description: 'Git repository URL (optional, uses workspace if empty)')
-        string(name: 'GIT_BRANCH', defaultValue: 'master', description: 'Git branch to build')
-        string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai:v0.19.0-cu130', description: 'Base Docker image')
+        string(name: 'GIT_BRANCH', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Git branch to build')
+        string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai-rocm:nightly', description: 'Base Docker image')
     }
 
     stages {