[Feature] Enable E8M0 by Default on Hopper for DeepGEMM, 5% E2E throughput improvement (#26197)

Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-10-08 03:33:56 -04:00
parent 335b28f7d1
commit f8607863d8
3 changed files with 10 additions and 29 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -629,25 +629,25 @@ def get_config(

    if quantization_config is not None:
        config.quantization_config = quantization_config
-        # auto-enable DeepGEMM UE8M0 on Hopper if model config requests it
+        # auto-enable DeepGEMM UE8M0 if model config requests it
        scale_fmt = quantization_config.get("scale_fmt", None)
        if scale_fmt in ("ue8m0",):
-            if not envs.is_set("VLLM_USE_DEEP_GEMM_E8M0_HOPPER"):
-                os.environ["VLLM_USE_DEEP_GEMM_E8M0_HOPPER"] = "1"
+            if not envs.is_set("VLLM_USE_DEEP_GEMM_E8M0"):
+                os.environ["VLLM_USE_DEEP_GEMM_E8M0"] = "1"
                logger.info_once(
                    (
                        "Detected quantization_config.scale_fmt=%s; "
-                        "enabling Hopper UE8M0."
+                        "enabling UE8M0 for DeepGEMM."
                    ),
                    scale_fmt,
                )
-            elif not envs.VLLM_USE_DEEP_GEMM_E8M0_HOPPER:
+            elif not envs.VLLM_USE_DEEP_GEMM_E8M0:
                logger.warning_once(
                    (
                        "Model config requests UE8M0 "
                        "(quantization_config.scale_fmt=%s), but "
-                        "VLLM_USE_DEEP_GEMM_E8M0_HOPPER=0 is set; "
-                        "Hopper UE8M0 disabled."
+                        "VLLM_USE_DEEP_GEMM_E8M0=0 is set; "
+                        "UE8M0 for DeepGEMM disabled."
                    ),
                    scale_fmt,
                )