[Misc] Removed force_fp8_e4m3fnuz from FP8LinearOp (#23725)
Signed-off-by: Julien Lin <jullin@nvidia.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@@ -17,6 +17,7 @@ from contextlib import contextmanager, suppress
|
||||
from multiprocessing import Process
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Literal, Optional, Union
|
||||
from unittest.mock import patch
|
||||
|
||||
import cloudpickle
|
||||
import httpx
|
||||
@@ -1077,3 +1078,11 @@ def get_attn_backend_list_based_on_platform() -> list[str]:
|
||||
return attn_backend_list
|
||||
else:
|
||||
raise ValueError("Unsupported platform")
|
||||
|
||||
|
||||
@contextmanager
|
||||
def override_cutlass_fp8_supported(value: bool):
|
||||
with patch(
|
||||
"vllm.model_executor.layers.quantization.utils.w8a8_utils.cutlass_fp8_supported",
|
||||
return_value=value):
|
||||
yield
|
||||
|
||||
Reference in New Issue
Block a user