[NVIDIA] Support nvfp4 quantization (#12784)

2025-02-12 19:51:51 -08:00
parent 9f9704dca6
commit 4fc5c23bb6
12 changed files with 688 additions and 13 deletions
--- a/vllm/scalar_type.py
+++ b/vllm/scalar_type.py
@@ -321,6 +321,9 @@ class scalar_types:
    # fp6, https://github.com/usyd-fsalab/fp6_llm/tree/main
    float6_e3m2f = ScalarType.float_(3, 2, True, NanRepr.NONE)

+    # fp4, https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf
+    float4_e2m1fn = ScalarType.float_(2, 1, True, NanRepr.NONE)
+
    # "gptq" types
    uint2b2 = ScalarType.uint(2, 2)
    uint3b4 = ScalarType.uint(3, 4)