From d64324e233f99dcc536e65c762f87cea554f7d14 Mon Sep 17 00:00:00 2001 From: Matthew Harris Date: Wed, 6 May 2026 15:48:44 -0400 Subject: [PATCH] patch to fix MLA multi-group KV cache --- Dockerfile | 25 ++++++++++++++++++++++++- Jenkinsfile | 6 +++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 07234ff..b58b3f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,26 @@ FROM vllm/vllm-openai-rocm:nightly -RUN pip install av soundfile \ No newline at end of file +ENV MAX_JOBS=2 + +# LMCache for KV cache offloading / sharing across nodes +# Build HIP extensions for MI300X (gfx942) +RUN apt-get update && apt-get install -y git && \ + git clone https://github.com/Byteflux/LMCache.git /tmp/lmcache && \ + cd /tmp/lmcache && \ + git checkout mla-multi-group-kv-cache-with-redis && \ + pip install --no-cache-dir -r requirements/build.txt && \ + BUILD_WITH_HIP=1 \ + CXX=hipcc \ + PYTORCH_ROCM_ARCH="gfx942" \ + pip install --no-cache-dir --no-build-isolation . --verbose && \ + rm -rf /tmp/lmcache && export CACHE_BUSTER=1 + +# Nemotron reasoning parser +COPY ./super_v3_reasoning_parser.py /opt/super_v3_reasoning_parser.py + +# DeepSeek tool call parser with MTP fixes +COPY deepseekv32_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/deepseekv32_tool_parser.py + +# MiniMax tool call parser with kwargs fixes +COPY minimax_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/minimax_tool_parser.py +COPY minimax_m2_parser.py /usr/local/lib/python3.12/dist-packages/vllm/parser/minimax_m2_parser.py \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 439582b..3de384e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,10 +7,10 @@ pipeline { } parameters { - string(name: 'IMAGE_TAG', defaultValue: 'v0.19.0-cu130', description: 'Docker image tag') + string(name: 'IMAGE_TAG', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Docker image tag') string(name: 'GIT_REPO', defaultValue: 'https://sweetapi.com/biondizzle/vllm-with-lmcache.git', description: 'Git repository URL (optional, uses workspace if empty)') - string(name: 'GIT_BRANCH', defaultValue: 'master', description: 'Git branch to build') - string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai:v0.19.0-cu130', description: 'Base Docker image') + string(name: 'GIT_BRANCH', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Git branch to build') + string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai-rocm:nightly', description: 'Base Docker image') } stages {