Quantization: support FP4 quantized models on AMD CDNA2/CDNA3 GPUs (#22527)

Signed-off-by: feng <fengli1702@gmail.com>
Signed-off-by: Michael Goin <mgoin64@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Daifeng Li
2025-08-23 10:53:21 +08:00
committed by GitHub
parent f6818a92cb
commit fa78de9dc3
8 changed files with 451 additions and 5 deletions

View File

@@ -52,6 +52,7 @@ WEIGHT_LOADER_V2_SUPPORTED = [
"HQQMarlinMethod",
"QuarkLinearMethod",
"ModelOptNvFp4LinearMethod",
"PetitNvFp4LinearMethod",
]