diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake
index c9813a73d..5a0980dcc 100644
--- a/cmake/cpu_extension.cmake
+++ b/cmake/cpu_extension.cmake
@@ -18,6 +18,7 @@ set(ENABLE_AVX512 $ENV{VLLM_CPU_AVX512})
 set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16})
 set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI})
 set(ENABLE_AMXBF16 $ENV{VLLM_CPU_AMXBF16})
+set(ENABLE_ARM_BF16 $ENV{VLLM_CPU_ARM_BF16})
 
 include_directories("${CMAKE_SOURCE_DIR}/csrc")
 
@@ -115,6 +116,10 @@ else()
         set(AVX512_FOUND ON)
         message(STATUS "AVX512 support enabled via VLLM_CPU_AVX512 environment variable")
     endif()
+    if (ENABLE_ARM_BF16)
+        set(ARM_BF16_FOUND ON)
+        message(STATUS "ARM BF16 support enabled via VLLM_CPU_ARM_BF16 environment variable")
+    endif()
 endif()
 
 if (AVX512_FOUND AND NOT AVX512_DISABLED)
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 063d3e6e4..d81957e02 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -20,6 +20,7 @@
 #   VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
 #   VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
 #   VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
+#   VLLM_CPU_ARM_BF16=false (default)|true (for cross-compilation)
 #
 
 ######################### COMMON BASE IMAGE #########################
@@ -108,9 +109,22 @@ ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
 # Support for building with AMXBF16 ISA: docker build --build-arg VLLM_CPU_AMXBF16="true" ...
 ARG VLLM_CPU_AMXBF16=1
 ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
+# Support for cross-compilation with ARM BF16 ISA: docker build --build-arg VLLM_CPU_ARM_BF16="true" ...
+ARG VLLM_CPU_ARM_BF16=0
+ENV VLLM_CPU_ARM_BF16=${VLLM_CPU_ARM_BF16}
 
 WORKDIR /vllm-workspace
 
+# Validate build arguments - prevent mixing incompatible ISA flags
+RUN if [ "$TARGETARCH" = "arm64" ] && { [ "$VLLM_CPU_AVX2" != "0" ] || [ "$VLLM_CPU_AVX512" != "0" ] || [ "$VLLM_CPU_AVX512BF16" != "0" ] || [ "$VLLM_CPU_AVX512VNNI" != "0" ]; }; then \
+        echo "ERROR: Cannot use x86-specific ISA flags (AVX2, AVX512, etc.) when building for ARM64 (--platform=linux/arm64)"; \
+        exit 1; \
+    fi && \
+    if [ "$TARGETARCH" = "amd64" ] && [ "$VLLM_CPU_ARM_BF16" != "0" ]; then \
+        echo "ERROR: Cannot use ARM-specific ISA flags (ARM_BF16) when building for x86_64 (--platform=linux/amd64)"; \
+        exit 1; \
+    fi
+
 # Copy build requirements
 COPY requirements/cpu-build.txt requirements/build.txt
 
@@ -224,6 +238,7 @@ ARG VLLM_CPU_AVX512
 ARG VLLM_CPU_AVX512BF16
 ARG VLLM_CPU_AVX512VNNI
 ARG VLLM_CPU_AMXBF16
+ARG VLLM_CPU_ARM_BF16
 ARG PYTHON_VERSION
 
 LABEL ai.vllm.build.target-arch="${TARGETARCH}"
@@ -233,6 +248,7 @@ LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
 LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
 LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
 LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
+LABEL ai.vllm.build.cpu-arm-bf16="${VLLM_CPU_ARM_BF16:-false}"
 LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"
 
 ENTRYPOINT ["vllm", "serve"]
diff --git a/docs/getting_started/installation/cpu.arm.inc.md b/docs/getting_started/installation/cpu.arm.inc.md
index e331d87a7..ae7d648b0 100644
--- a/docs/getting_started/installation/cpu.arm.inc.md
+++ b/docs/getting_started/installation/cpu.arm.inc.md
@@ -172,25 +172,78 @@ docker pull public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:${VLLM_COMMIT}-arm64-
 
 # --8<-- [end:pre-built-images]
 # --8<-- [start:build-image-from-source]
+
+## Building for your target ARM CPU
+
 ```bash
 docker build -f docker/Dockerfile.cpu \
-        --tag vllm-cpu-env .
+        --platform=linux/arm64 \
+        --build-arg VLLM_CPU_ARM_BF16=<false (default)|true> \
+        --tag vllm-cpu-env \
+        --target vllm-openai .
+```
 
-# Launching OpenAI server
+!!! note "Auto-detection by default"
+    By default, ARM CPU instruction sets (BF16, NEON, etc.) are automatically detected from the build system's CPU flags. The `VLLM_CPU_ARM_BF16` build argument is used for cross-compilation:
+
+    - `VLLM_CPU_ARM_BF16=true` - Force-enable ARM BF16 support (build with BF16 regardless of build system capabilities)
+    - `VLLM_CPU_ARM_BF16=false` - Rely on auto-detection (default)
+
+### Examples
+
+**Auto-detection build (native ARM)**
+
+```bash
+# Building on ARM64 system - platform auto-detected
+docker build -f docker/Dockerfile.cpu \
+        --tag vllm-cpu-arm64 \
+        --target vllm-openai .
+```
+
+**Cross-compile for ARM with BF16 support**
+
+```bash
+# Building on ARM64 for newer ARM CPUs with BF16
+docker build -f docker/Dockerfile.cpu \
+        --build-arg VLLM_CPU_ARM_BF16=true \
+        --tag vllm-cpu-arm64-bf16 \
+        --target vllm-openai .
+```
+
+**Cross-compile from x86_64 to ARM64 with BF16**
+
+```bash
+# Requires Docker buildx with ARM emulation (QEMU)
+docker buildx build -f docker/Dockerfile.cpu \
+        --platform=linux/arm64 \
+        --build-arg VLLM_CPU_ARM_BF16=true \
+        --build-arg max_jobs=4 \
+        --tag vllm-cpu-arm64-bf16 \
+        --target vllm-openai \
+        --load .
+```
+
+!!! note "ARM BF16 requirements"
+    ARM BF16 support requires ARMv8.6-A or later (FEAT_BF16). Supported on AWS Graviton3/4, AmpereOne, and other recent ARM processors.
+
+## Launching the OpenAI server
+
+```bash
 docker run --rm \
-            --privileged=true \
+            --security-opt seccomp=unconfined \
+            --cap-add SYS_NICE \
             --shm-size=4g \
             -p 8000:8000 \
             -e VLLM_CPU_KVCACHE_SPACE=<KV cache space> \
             -e VLLM_CPU_OMP_THREADS_BIND=<CPU cores for inference> \
-            vllm-cpu-env \
-            --model=meta-llama/Llama-3.2-1B-Instruct \
+            vllm-cpu-arm64 \
+            meta-llama/Llama-3.2-1B-Instruct \
             --dtype=bfloat16 \
             other vLLM OpenAI server arguments
 ```
 
-!!! tip
-    An alternative of `--privileged=true` is `--cap-add SYS_NICE --security-opt seccomp=unconfined`.
+!!! tip "Alternative to --privileged"
+    Instead of `--privileged=true`, use `--cap-add SYS_NICE --security-opt seccomp=unconfined` for better security.
 
 # --8<-- [end:build-image-from-source]
 # --8<-- [start:extra-information]