diff --git a/Dockerfile b/Dockerfile index 6eda6d7..2f32a5b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,8 @@ RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace && PATCH_CACHE_BUS ARG PATCH_CACHE_BUSTER=69 # Copy our DeepSeek V4 patch over vLLM's model file COPY patches/deepseek_v4.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_v4.py +# Copy the NVFP4 staging kernel (BF16→E2M1+UE4M3 quantization for activations) +COPY patches/staging_kernel.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/staging_kernel.py # Verify everything imports RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \