17 lines
896 B
Docker
17 lines
896 B
Docker
FROM vllm/vllm-openai:v0.19.0
|
|
|
|
# Download and extract Eagle3 drafter model layers
|
|
RUN apt-get update && apt-get install -y --no-install-recommends unzip && rm -rf /var/lib/apt/lists/*
|
|
|
|
ADD https://ewr1.vultrobjects.com/artifacts/models--nvidia--Kimi-K2.5-Thinking-Eagle3.zip /tmp/eagle3.zip
|
|
RUN unzip /tmp/eagle3.zip -d /opt/nvidia-Kimi-K2.5-Thinking-Eagle3 && \
|
|
rm /tmp/eagle3.zip && \
|
|
apt-get remove -y unzip && apt-get autoremove -y
|
|
|
|
# Patch tool and reasoning parsers for Eagle
|
|
COPY kimi_k2_tool_parser.py /usr/local/lib/python3.12/dist-packages/vllm/tool_parsers/kimi_k2_tool_parser.py
|
|
|
|
COPY kimi_k2_reasoning_parser.py /usr/local/lib/python3.12/dist-packages/vllm/reasoning/kimi_k2_reasoning_parser.py
|
|
|
|
# Patch serving layer: flush reasoning→content on finish_reason=length
|
|
COPY serving.py /usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/chat_completion/serving.py |