diff --git a/tests/compile/fusions_e2e/test_tp1_quant.py b/tests/compile/fusions_e2e/test_tp1_quant.py
index dff167588..f98400c2e 100644
--- a/tests/compile/fusions_e2e/test_tp1_quant.py
+++ b/tests/compile/fusions_e2e/test_tp1_quant.py
@@ -5,6 +5,7 @@ from collections.abc import Callable
 import pytest
 
 from vllm.config import PassConfig
+from vllm.utils.flashinfer import is_flashinfer_fp8_blockscale_gemm_supported
 
 from .common import (
     INDUCTOR_GRAPH_PARTITION,
@@ -50,6 +51,10 @@ def test_tp1_fp8_fusions(
     run_e2e_fusion_test,
     monkeypatch,
 ):
+    if use_deepgemm and is_flashinfer_fp8_blockscale_gemm_supported():
+        # Flashinfer block FP8 GEMM has internal quantization, so it can't
+        # be fused with other ops.
+        pytest.skip("FlashInfer block FP8 GEMM not supported")
     if use_deepgemm and is_blackwell():
         # TODO(luka) DeepGEMM uses different quants, matching not supported
         #  - on Blackwell, uses a special quant fp8, currently not supported
diff --git a/vllm/envs.py b/vllm/envs.py
index 2b341bd5b..e6b824c56 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -159,7 +159,7 @@ if TYPE_CHECKING:
         "relax",
     ] = "relax"
     VLLM_USE_FUSED_MOE_GROUPED_TOPK: bool = True
-    VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER: bool = False
+    VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER: bool = True
     VLLM_USE_FLASHINFER_MOE_FP16: bool = False
     VLLM_USE_FLASHINFER_MOE_FP8: bool = False
     VLLM_USE_FLASHINFER_MOE_FP4: bool = False
@@ -1198,7 +1198,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # Allow use of FlashInfer FP8 block-scale GEMM for linear layers.
     # This uses TensorRT-LLM kernels and requires SM90+ (Hopper).
     "VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER": lambda: bool(
-        int(os.getenv("VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER", "0"))
+        int(os.getenv("VLLM_BLOCKSCALE_FP8_GEMM_FLASHINFER", "1"))
     ),
     # Allow use of FlashInfer BF16 MoE kernels for fused moe ops.
     "VLLM_USE_FLASHINFER_MOE_FP16": lambda: bool(