[Kernel] Add ModelOpt FP4 Checkpoint Support (#12520)

Signed-off-by: Pavani Majety <pmajety@nvidia.com>
2025-03-11 22:13:11 -07:00
parent 5c538c37b2
commit debd6bbf09
10 changed files with 388 additions and 30 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -613,7 +613,7 @@ class ModelConfig:
        optimized_quantization_methods = [
            "fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
            "awq_marlin", "fbgemm_fp8", "compressed_tensors",
-            "compressed-tensors", "experts_int8", "quark"
+            "compressed-tensors", "experts_int8", "quark", "nvfp4"
        ]
        if self.quantization is not None:
            self.quantization = self.quantization.lower()