diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 39f7d4d66..a4c98f86e 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -2071,6 +2071,14 @@ steps: - pytest -v -s kernels/moe --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT parallelism: 2 +- label: Kernels FP8 MoE Test + timeout_in_minutes: 60 + mirror_hardwares: [amdexperimental, amdproduction] + agent_pool: mi325_2 + optional: true + commands: + - pytest -v -s kernels/moe/test_deepep_moe.py + - label: Kernels Mamba Test # 31min timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] @@ -3801,6 +3809,14 @@ steps: - pytest -v -s kernels/moe --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT parallelism: 2 +- label: Kernels FP8 MoE Test + timeout_in_minutes: 60 + mirror_hardwares: [amdexperimental, amdproduction] + agent_pool: mi355_2 + optional: true + commands: + - pytest -v -s kernels/moe/test_deepep_moe.py + - label: Kernels Mamba Test # 31min timeout_in_minutes: 45 mirror_hardwares: [amdexperimental, amdproduction] diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 22226e8da..f8a4274a1 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -184,6 +184,34 @@ RUN cd /opt/rixl && mkdir -p /app/install && \ --ucx-plugins-dir ${UCX_HOME}/lib/ucx \ --nixl-plugins-dir ${RIXL_HOME}/lib/x86_64-linux-gnu/plugins +# DeepEP build stage +FROM base AS build_deep +ARG ROCSHMEM_BRANCH="ba0bf0f3" +ARG ROCSHMEM_REPO="https://github.com/ROCm/rocm-systems.git" +ARG DEEPEP_BRANCH="e84464ec" +ARG DEEPEP_REPO="https://github.com/ROCm/DeepEP.git" +ARG DEEPEP_NIC="cx7" +ENV ROCSHMEM_DIR=/opt/rocshmem + +RUN git clone ${ROCSHMEM_REPO} \ + && cd rocm-systems \ + && git checkout ${ROCSHMEM_BRANCH} \ + && mkdir -p projects/rocshmem/build \ + && cd projects/rocshmem/build \ + && cmake .. \ + -DCMAKE_INSTALL_PREFIX="${ROCSHMEM_DIR}" \ + -DROCM_PATH=/opt/rocm \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DUSE_EXTERNAL_MPI=OFF \ + && make -j \ + && make install + +# Build DeepEP wheel. +# DeepEP looks for rocshmem at ROCSHMEM_DIR. +RUN git clone ${DEEPEP_REPO} \ + && cd DeepEP \ + && git checkout ${DEEPEP_BRANCH} \ + && python3 setup.py --variant rocm --nic ${DEEPEP_NIC} bdist_wheel --dist-dir=/app/deep_install # ----------------------- # vLLM wheel release build stage (for building distributable wheels) @@ -305,6 +333,11 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ RUN --mount=type=bind,from=build_rixl,src=/app/install,target=/rixl_install \ uv pip install --system /rixl_install/*.whl +# Install DeepEP wheel +RUN --mount=type=bind,from=build_deep,src=/app/deep_install,target=/deep_install \ + uv pip install --system /deep_install/*.whl +COPY --from=build_deep /opt/rocshmem /opt/rocshmem + # RIXL/MoRIIO runtime dependencies (RDMA userspace libraries) RUN apt-get update -q -y && apt-get install -q -y \ librdmacm1 \