Refactor NVFP4 Linear utils for ModelOpt and CT (#33201)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2026-01-30 19:37:42 -05:00
committed by GitHub
parent 2b465570e6
commit 67ebaff528
12 changed files with 462 additions and 483 deletions

View File

@@ -25,7 +25,7 @@ from vllm.model_executor.layers.fused_moe.prepare_finalize import (
from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import (
TritonOrDeepGemmExperts,
)
from vllm.model_executor.layers.quantization.utils.quant_utils import (
from vllm.model_executor.layers.quantization.utils.nvfp4_utils import (
cutlass_fp4_supported,
)
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (