[Kernel] Add ModelOpt FP4 Checkpoint Support (#12520)

Signed-off-by: Pavani Majety <pmajety@nvidia.com>
This commit is contained in:
Pavani Majety
2025-03-11 22:13:11 -07:00
committed by GitHub
parent 5c538c37b2
commit debd6bbf09
10 changed files with 388 additions and 30 deletions

View File

@@ -613,7 +613,7 @@ class ModelConfig:
optimized_quantization_methods = [
"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
"awq_marlin", "fbgemm_fp8", "compressed_tensors",
"compressed-tensors", "experts_int8", "quark"
"compressed-tensors", "experts_int8", "quark", "nvfp4"
]
if self.quantization is not None:
self.quantization = self.quantization.lower()