patch to fix MLA multi-group KV cache
This commit is contained in:
25
Dockerfile
25
Dockerfile
@@ -1,3 +1,26 @@
|
||||
FROM vllm/vllm-openai-rocm:nightly
|
||||
|
||||
RUN pip install av soundfile
|
||||
ENV MAX_JOBS=2
|
||||
|
||||
# LMCache for KV cache offloading / sharing across nodes
|
||||
# Build HIP extensions for MI300X (gfx942)
|
||||
RUN apt-get update && apt-get install -y git && \
|
||||
git clone https://github.com/Byteflux/LMCache.git /tmp/lmcache && \
|
||||
cd /tmp/lmcache && \
|
||||
git checkout mla-multi-group-kv-cache-with-redis && \
|
||||
pip install --no-cache-dir -r requirements/build.txt && \
|
||||
BUILD_WITH_HIP=1 \
|
||||
CXX=hipcc \
|
||||
PYTORCH_ROCM_ARCH="gfx942" \
|
||||
pip install --no-cache-dir --no-build-isolation . --verbose && \
|
||||
rm -rf /tmp/lmcache && export CACHE_BUSTER=1
|
||||
|
||||
# Nemotron reasoning parser
|
||||
COPY ./super_v3_reasoning_parser.py /opt/super_v3_reasoning_parser.py
|
||||
|
||||
# DeepSeek tool call parser with MTP fixes
|
||||
COPY deepseekv32_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/deepseekv32_tool_parser.py
|
||||
|
||||
# MiniMax tool call parser with kwargs fixes
|
||||
COPY minimax_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/minimax_tool_parser.py
|
||||
COPY minimax_m2_parser.py /usr/local/lib/python3.12/dist-packages/vllm/parser/minimax_m2_parser.py
|
||||
6
Jenkinsfile
vendored
6
Jenkinsfile
vendored
@@ -7,10 +7,10 @@ pipeline {
|
||||
}
|
||||
|
||||
parameters {
|
||||
string(name: 'IMAGE_TAG', defaultValue: 'v0.19.0-cu130', description: 'Docker image tag')
|
||||
string(name: 'IMAGE_TAG', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Docker image tag')
|
||||
string(name: 'GIT_REPO', defaultValue: 'https://sweetapi.com/biondizzle/vllm-with-lmcache.git', description: 'Git repository URL (optional, uses workspace if empty)')
|
||||
string(name: 'GIT_BRANCH', defaultValue: 'master', description: 'Git branch to build')
|
||||
string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai:v0.19.0-cu130', description: 'Base Docker image')
|
||||
string(name: 'GIT_BRANCH', defaultValue: 'rocm-mla-multi-group-kv-cache', description: 'Git branch to build')
|
||||
string(name: 'BASE_IMAGE', defaultValue: 'vllm/vllm-openai-rocm:nightly', description: 'Base Docker image')
|
||||
}
|
||||
|
||||
stages {
|
||||
|
||||
Reference in New Issue
Block a user