diff --git a/vllm/Dockerfile b/vllm/Dockerfile
index c03a64c..732b4bc 100644
--- a/vllm/Dockerfile
+++ b/vllm/Dockerfile
@@ -88,7 +88,7 @@ RUN cd xformers && \
 
 FROM build-base AS build-flashinfer
 ARG FLASHINFER_ENABLE_AOT=1
-ARG FLASHINFER_REF=v0.4.0
+ARG FLASHINFER_REF=v0.4.1
 ARG FLASHINFER_BUILD_SUFFIX=cu130
 ENV FLASHINFER_LOCAL_VERSION=${FLASHINFER_BUILD_SUFFIX:-}
 RUN git clone https://github.com/flashinfer-ai/flashinfer.git
@@ -109,24 +109,24 @@ RUN git clone https://github.com/LMCache/LMCache.git -b ${LMCACHE_REF} && \
     cp dist/*.whl /wheels/
 
 
-# FROM build-base AS build-flash-attention
-# RUN apt-get update && apt-get install -y build-essential cmake gcc && \
-#     git clone --depth=1 https://github.com/Dao-AILab/flash-attention flash-attention && \
-#     cd flash-attention/hopper && \
-#     mkdir wheels && \
-#     export MAX_JOBS=8 && \
-#     export NVCC_THREADS=1 && \
-#     export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
-#     MAX_JOBS=$MAX_JOBS \
-#     CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS \
-#     FLASH_ATTENTION_FORCE_BUILD="TRUE" \
-#     FLASH_ATTENTION_FORCE_CXX11_ABI="FALSE" \
-#     FLASH_ATTENTION_SKIP_CUDA_BUILD="FALSE" \
-#     pip3 wheel . -v --no-deps -w ./wheels/ && \
-#     cp wheels/*.whl /wheels/
+FROM build-base AS build-flash-attention
+RUN apt-get update && apt-get install -y build-essential cmake gcc && \
+    git clone https://github.com/Dao-AILab/flash-attention flash-attention && \
+    cd flash-attention/hopper && \
+    mkdir wheels && \
+    export MAX_JOBS=8 && \
+    export NVCC_THREADS=1 && \
+    export CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS && \
+    MAX_JOBS=$MAX_JOBS \
+    CMAKE_BUILD_PARALLEL_LEVEL=$MAX_JOBS \
+    FLASH_ATTENTION_FORCE_BUILD="TRUE" \
+    FLASH_ATTENTION_FORCE_CXX11_ABI="FALSE" \
+    FLASH_ATTENTION_SKIP_CUDA_BUILD="FALSE" \
+    pip3 wheel . -v --no-deps -w ./wheels/ && \
+    cp wheels/*.whl /wheels/
 
 FROM build-base AS build-vllm
-ARG VLLM_REF=v0.11.1rc1
+ARG VLLM_REF=v0.11.1rc2
 # Install ccache for faster compilation
 RUN apt-get update && apt-get install -y ccache
 RUN git clone https://github.com/vllm-project/vllm.git
@@ -163,6 +163,7 @@ RUN git clone https://github.com/bytedance/InfiniStore && \
     cp dist/*.whl /wheels/
 
 FROM base AS vllm-openai
+COPY --from=build-flash-attention /wheels/* wheels/
 COPY --from=build-flashinfer /wheels/* wheels/
 COPY --from=build-triton /wheels/* wheels/
 COPY --from=build-vllm /wheels/* wheels/
diff --git a/vllm/README.md b/vllm/README.md
index 7b9d65a..a99c3b2 100644
--- a/vllm/README.md
+++ b/vllm/README.md
@@ -6,6 +6,6 @@ Hosted [here](https://hub.docker.com/repository/docker/rajesh550/gh200-vllm)
  docker login
 # Alternative
 # docker buildx build --platform linux/arm64 --memory=600g -t rajesh550/gh200-vllm:0.9.0.1 .
- docker build --memory=450g --platform linux/arm64 -t rajesh550/gh200-vllm:0.11.1rc1 . 2>&1 | tee build.log 
- docker push rajesh550/gh200-vllm:0.11.1rc1
+ docker build --memory=450g --platform linux/arm64 -t rajesh550/gh200-vllm:0.11.1rc2 . 2>&1 | tee build.log 
+ docker push rajesh550/gh200-vllm:0.11.1rc2
 ```
\ No newline at end of file