Files
vllm-kimi25-eagle/Dockerfile
2026-04-14 09:51:51 +00:00

17 lines
896 B
Docker

FROM vllm/vllm-openai:v0.19.0
# Download and extract Eagle3 drafter model layers
RUN apt-get update && apt-get install -y --no-install-recommends unzip && rm -rf /var/lib/apt/lists/*
ADD https://ewr1.vultrobjects.com/artifacts/models--nvidia--Kimi-K2.5-Thinking-Eagle3.zip /tmp/eagle3.zip
RUN unzip /tmp/eagle3.zip -d /opt/nvidia-Kimi-K2.5-Thinking-Eagle3 && \
rm /tmp/eagle3.zip && \
apt-get remove -y unzip && apt-get autoremove -y
# Patch tool and reasoning parsers for Eagle
COPY kimi_k2_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/kimi_k2_tool_parser.py
COPY kimi_k2_reasoning_parser.py /usr/local/lib/python3.12/dist-packages/vllm/reasoning/kimi_k2_reasoning_parser.py
# Patch serving layer: flush reasoning→content on finish_reason=length
COPY serving.py /usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/chat_completion/serving.py