[NVIDIA] Support nvfp4 quantization (#12784)

2025-02-12 19:51:51 -08:00
parent 9f9704dca6
commit 4fc5c23bb6
12 changed files with 688 additions and 13 deletions
--- a/tests/test_scalartype.py
+++ b/tests/test_scalartype.py
@@ -11,6 +11,7 @@ from vllm.scalar_type import scalar_types
    (0, 15, scalar_types.uint4),
    (-8, 7, scalar_types.uint4b8),
    (-128, 127, scalar_types.uint8b128),
+    (-6., 6., scalar_types.float4_e2m1fn),
    (-28., 28., scalar_types.float6_e3m2f),
    (torch.int8, scalar_types.int8),
    (torch.uint8, scalar_types.uint8),