[Feature]: Support NVIDIA ModelOpt HF FP8 variants FP8_PER_CHANNEL_PER_TOKEN and FP8_PB_WO in vLLM (#30957)
This commit is contained in:
@@ -53,6 +53,8 @@ WEIGHT_LOADER_V2_SUPPORTED = [
|
||||
"GPTQLinearMethod",
|
||||
"FBGEMMFp8LinearMethod",
|
||||
"ModelOptFp8LinearMethod",
|
||||
"ModelOptFp8PcPtLinearMethod",
|
||||
"ModelOptFp8PbWoLinearMethod",
|
||||
"IPEXAWQLinearMethod",
|
||||
"IPEXGPTQLinearMethod",
|
||||
"HQQMarlinMethod",
|
||||
|
||||
Reference in New Issue
Block a user