[torch.compile] Disable ar-rms fusion for ds3-fp4 & DP, fix CI test (#34392)

Signed-off-by: Luka Govedič <lgovedic@redhat.com>
Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Luka Govedič
2026-02-15 09:33:57 -05:00
committed by GitHub
parent f07a128413
commit 23d825aba1
3 changed files with 46 additions and 3 deletions

View File

@@ -1687,6 +1687,20 @@ class ModelConfig:
def is_quantized(self) -> bool:
return getattr(self.hf_config, "quantization_config", None) is not None
def is_nvfp4_quantized(self) -> bool:
# ModelOpt NVFP4 checkpoints resolve to modelopt_fp4 quantization method
if self.quantization in ("modelopt_fp4",):
return True
# For Compressed Tensors we look for `"format": "nvfp4-pack-quantized"`
# in the quantization config
quant_config = self.model_arch_config.quantization_config
return (
self.quantization == "compressed-tensors"
and quant_config is not None
and "nvfp4" in quant_config.get("format", "").lower()
)
def get_served_model_name(model: str, served_model_name: str | list[str] | None):
"""