[Misc] Removed force_fp8_e4m3fnuz from FP8LinearOp (#23725)

Signed-off-by: Julien Lin <jullin@nvidia.com>
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
nvjullin
2025-09-04 21:25:40 +08:00
committed by GitHub
parent c9f7081f9c
commit 37241077d5
5 changed files with 45 additions and 30 deletions

View File

@@ -17,6 +17,7 @@ from contextlib import contextmanager, suppress
from multiprocessing import Process
from pathlib import Path
from typing import Any, Callable, Literal, Optional, Union
from unittest.mock import patch
import cloudpickle
import httpx
@@ -1077,3 +1078,11 @@ def get_attn_backend_list_based_on_platform() -> list[str]:
return attn_backend_list
else:
raise ValueError("Unsupported platform")
@contextmanager
def override_cutlass_fp8_supported(value: bool):
with patch(
"vllm.model_executor.layers.quantization.utils.w8a8_utils.cutlass_fp8_supported",
return_value=value):
yield