[Quantization] Add compressed-tensors NVFP4 MoE Support (#19990)
Signed-off-by: Dipika Sikka <dipikasikka1@gmail.com> Signed-off-by: Dipika <dipikasikka1@gmail.com>
This commit is contained in:
@@ -17,7 +17,7 @@ from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tenso
|
||||
CompressedTensorsW4A4Fp4, CompressedTensorsW4A16Fp4,
|
||||
CompressedTensorsW4A16Sparse24, CompressedTensorsW8A8Fp8,
|
||||
CompressedTensorsW8A8Int8, CompressedTensorsW8A16Fp8,
|
||||
CompressedTensorsWNA16)
|
||||
CompressedTensorsWNA16, cutlass_fp4_supported)
|
||||
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
|
||||
sparse_cutlass_supported)
|
||||
from vllm.platforms import current_platform
|
||||
@@ -668,8 +668,8 @@ def test_compressed_tensors_nvfp4(vllm_runner, args):
|
||||
assert isinstance(qkv_proj.quant_method,
|
||||
CompressedTensorsLinearMethod)
|
||||
if isinstance(qkv_proj.scheme, scheme) or isinstance(
|
||||
qkv_proj.scheme, CompressedTensorsW4A16Fp4
|
||||
) and not CompressedTensorsW4A4Fp4.cutlass_fp4_supported():
|
||||
qkv_proj.scheme,
|
||||
CompressedTensorsW4A16Fp4) and not cutlass_fp4_supported():
|
||||
assert True
|
||||
else:
|
||||
raise AssertionError("FP4 Scheme Mismatch")
|
||||
|
||||
Reference in New Issue
Block a user