diff --git a/docker/Dockerfile b/docker/Dockerfile
index 71cef521b..cc2ccc11c 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -582,7 +582,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 # This is ~1.1GB and only changes when FlashInfer version bumps
 # https://docs.flashinfer.ai/installation.html
 # From versions.json: .flashinfer.version
-ARG FLASHINFER_VERSION=0.6.3
+ARG FLASHINFER_VERSION=0.6.4
 RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} \
     && uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
diff --git a/docker/Dockerfile.nightly_torch b/docker/Dockerfile.nightly_torch
index b4d590016..6f6f147c4 100644
--- a/docker/Dockerfile.nightly_torch
+++ b/docker/Dockerfile.nightly_torch
@@ -217,13 +217,13 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.
 
 
 # build flashinfer for torch nightly from source around 10 mins
-# release version: v0.6.3
+# release version: v0.6.4
 # todo(elainewy): cache flashinfer build result for faster build
 ENV CCACHE_DIR=/root/.cache/ccache
 RUN --mount=type=cache,target=/root/.cache/ccache \
     --mount=type=cache,target=/root/.cache/uv \
     echo "git clone flashinfer..." \
-    && git clone --depth 1 --branch v0.6.3 --recursive https://github.com/flashinfer-ai/flashinfer.git \
+    && git clone --depth 1 --branch v0.6.4 --recursive https://github.com/flashinfer-ai/flashinfer.git \
     && cd flashinfer \
     && git submodule update --init --recursive \
     && echo "finish git clone flashinfer..." \
diff --git a/docker/versions.json b/docker/versions.json
index 6277e0b6f..24f4b6e7d 100644
--- a/docker/versions.json
+++ b/docker/versions.json
@@ -68,7 +68,7 @@
       "default": "true"
     },
     "FLASHINFER_VERSION": {
-      "default": "0.6.3"
+      "default": "0.6.4"
     },
     "GDRCOPY_CUDA_VERSION": {
       "default": "12.8"
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index 15e4ebbf4..84fe34730 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -10,4 +10,4 @@ torchaudio==2.10.0
 # These must be updated alongside torch
 torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # FlashInfer should be updated together with the Dockerfile
-flashinfer-python==0.6.3
+flashinfer-python==0.6.4
diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py
index e67a77005..27cf3a792 100644
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@@ -536,34 +536,12 @@ class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
             )
 
 
-class DeepseekV3ForCausalLM(VerifyAndUpdateConfig):
-    @classmethod
-    def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
-        """Disable AR-RMS-Quant fusion for DeepSeekV3 in NVFP4"""
-        # TODO: https://github.com/vllm-project/vllm/issues/34395
-
-        # disable AR-rms-fp4 fusion for DSv3+
-        ar_rms_enabled = vllm_config.compilation_config.pass_config.fuse_allreduce_rms
-        nvfp4 = vllm_config.model_config.is_nvfp4_quantized()
-
-        # Disable by default, warn if manually enabled:
-        if ar_rms_enabled is None and nvfp4:
-            vllm_config.compilation_config.pass_config.fuse_allreduce_rms = False
-        if ar_rms_enabled and nvfp4:
-            logger.warning(
-                "Allreduce-rms fusion broken for DeepSeekV3 with NVFP4 quant,"
-                "see https://github.com/vllm-project/vllm/issues/34395."
-            )
-
-
-class DeepseekV32ForCausalLM(DeepseekV3ForCausalLM):
+class DeepseekV32ForCausalLM(VerifyAndUpdateConfig):
     @classmethod
     def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         """
         Updated fp8 cache to custom "fp8_ds_mla" format for DeepSeekV32
         """
-        super().verify_and_update_config(vllm_config)
-
         hf_config = vllm_config.model_config.hf_config
 
         # Mirror the check in vllm/model_executor/models/deepseek_v2.py
@@ -654,7 +632,6 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
     "MambaForCausalLM": MambaModelConfig,
     "Mamba2ForCausalLM": MambaModelConfig,
     "FalconMambaForCausalLM": MambaModelConfig,
-    "DeepseekV3ForCausalLM": DeepseekV3ForCausalLM,
     "DeepseekV32ForCausalLM": DeepseekV32ForCausalLM,
     "NemotronHForCausalLM": NemotronHForCausalLMConfig,
     "NemotronHPuzzleForCausalLM": NemotronHForCausalLMConfig,