[Kernel] Add ModelOpt FP4 Checkpoint Support (#12520)
Signed-off-by: Pavani Majety <pmajety@nvidia.com>
This commit is contained in:
@@ -30,12 +30,23 @@ from vllm.model_executor.utils import set_weight_attrs
|
||||
logger = init_logger(__name__)
|
||||
|
||||
WEIGHT_LOADER_V2_SUPPORTED = [
|
||||
"CompressedTensorsLinearMethod", "AWQMarlinLinearMethod",
|
||||
"AWQLinearMethod", "GPTQMarlinLinearMethod", "Fp8LinearMethod",
|
||||
"MarlinLinearMethod", "QQQLinearMethod", "GPTQMarlin24LinearMethod",
|
||||
"TPUInt8LinearMethod", "GPTQLinearMethod", "FBGEMMFp8LinearMethod",
|
||||
"ModelOptFp8LinearMethod", "IPEXAWQLinearMethod", "IPEXGPTQLinearMethod",
|
||||
"HQQMarlinMethod", "QuarkLinearMethod"
|
||||
"CompressedTensorsLinearMethod",
|
||||
"AWQMarlinLinearMethod",
|
||||
"AWQLinearMethod",
|
||||
"GPTQMarlinLinearMethod",
|
||||
"Fp8LinearMethod",
|
||||
"MarlinLinearMethod",
|
||||
"QQQLinearMethod",
|
||||
"GPTQMarlin24LinearMethod",
|
||||
"TPUInt8LinearMethod",
|
||||
"GPTQLinearMethod",
|
||||
"FBGEMMFp8LinearMethod",
|
||||
"ModelOptFp8LinearMethod",
|
||||
"IPEXAWQLinearMethod",
|
||||
"IPEXGPTQLinearMethod",
|
||||
"HQQMarlinMethod",
|
||||
"QuarkLinearMethod",
|
||||
"ModelOptNvFp4LinearMethod",
|
||||
]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user