[Refactor] Make FP8 Linear Ops use kernel abstraction (#27814)
Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
This commit is contained in:
@@ -41,7 +41,7 @@ ROCM_AITER_SUPPORTED_INT8_MODEL = [
|
||||
"nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2",
|
||||
]
|
||||
|
||||
# TritonScaledMMLinearKernel only supports symmetric quantization.
|
||||
# TritonInt8ScaledMMLinearKernel only supports symmetric quantization.
|
||||
ROCM_TRITON_SCALED_MM_SUPPORTED_INT8_MODEL = [
|
||||
"nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change",
|
||||
"nm-testing/tinyllama-oneshot-w8-channel-a8-tensor",
|
||||
|
||||
Reference in New Issue
Block a user