fix: add missing staging_kernel.py to Dockerfile — BF16→E2M1+UE4M3 quantization was never in container

2026-05-12 21:21:24 +00:00
parent 91d7d9bad7
commit d88ea9842b
1 changed files with 2 additions and 0 deletions
--- a/2
+++ b/2
@@ -22,6 +22,8 @@ RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace && PATCH_CACHE_BUS
 ARG PATCH_CACHE_BUSTER=69
 # Copy our DeepSeek V4 patch over vLLM's model file
 COPY patches/deepseek_v4.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_v4.py
+# Copy the NVFP4 staging kernel (BF16→E2M1+UE4M3 quantization for activations)
+COPY patches/staging_kernel.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/staging_kernel.py

 # Verify everything imports
 RUN python3 -c "import deep_gemm; print('DeepGEMM NVFP4 OK')" && \