[Feature]: Support NVIDIA ModelOpt HF FP8 variants FP8_PER_CHANNEL_PER_TOKEN and FP8_PB_WO in vLLM (#30957)

This commit is contained in:
CedricHuang
2025-12-22 11:34:49 +08:00
committed by GitHub
parent 097978a15d
commit 19cc9468fd
5 changed files with 437 additions and 15 deletions

View File

@@ -53,6 +53,8 @@ WEIGHT_LOADER_V2_SUPPORTED = [
"GPTQLinearMethod",
"FBGEMMFp8LinearMethod",
"ModelOptFp8LinearMethod",
"ModelOptFp8PcPtLinearMethod",
"ModelOptFp8PbWoLinearMethod",
"IPEXAWQLinearMethod",
"IPEXGPTQLinearMethod",
"HQQMarlinMethod",