bump cache buster 30 for FP4 staging + DeepGEMM FP4 activations
This commit is contained in:
@@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y git screen cmake && rm -rf /var/lib/apt
|
||||
|
||||
# Clone and build DeepGEMM with NVFP4 mega_moe kernel
|
||||
# CACHE_BUSTER: increment to force fresh clone
|
||||
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=28
|
||||
RUN git clone -b nvfp4-mega-moe https://sweetapi.com/biondizzle/DeepGEMM.git /root/DeepGEMM && CACHE_BUSTER=30
|
||||
|
||||
# Build DeepGEMM with proper CUDA/NVRTC paths
|
||||
ENV CPATH="/usr/local/lib/python3.12/dist-packages/flashinfer/data/cutlass/include:/usr/local/lib/python3.12/dist-packages/nvidia/cu13/include:/usr/local/cuda-13.0/include:${CPATH}"
|
||||
@@ -19,7 +19,7 @@ RUN ln -sf /usr/local/lib/python3.12/dist-packages/nvidia/cu13/lib/libnvrtc.so.1
|
||||
RUN cd /root/DeepGEMM && python3 setup.py build_ext --inplace
|
||||
|
||||
# Bust cache for patch changes — ARG before COPY ensures layer invalidation
|
||||
ARG PATCH_CACHE_BUSTER=29
|
||||
ARG PATCH_CACHE_BUSTER=30
|
||||
# Copy our DeepSeek V4 patch over vLLM's model file
|
||||
COPY patches/deepseek_v4.py /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_v4.py
|
||||
|
||||
|
||||
Reference in New Issue
Block a user