[NVIDIA] Support nvfp4 quantization (#12784)

This commit is contained in:
Kaixi Hou
2025-02-12 19:51:51 -08:00
committed by GitHub
parent 9f9704dca6
commit 4fc5c23bb6
12 changed files with 688 additions and 13 deletions

View File

@@ -11,6 +11,7 @@ from vllm.scalar_type import scalar_types
(0, 15, scalar_types.uint4),
(-8, 7, scalar_types.uint4b8),
(-128, 127, scalar_types.uint8b128),
(-6., 6., scalar_types.float4_e2m1fn),
(-28., 28., scalar_types.float6_e3m2f),
(torch.int8, scalar_types.int8),
(torch.uint8, scalar_types.uint8),