[Misc] Update gptq_marlin to use new vLLMParameters (#7281)

2024-08-13 14:30:11 -04:00
parent 181abbc27d
commit fb377d7e74
8 changed files with 234 additions and 98 deletions
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -20,7 +20,9 @@ from vllm.model_executor.utils import set_weight_attrs

 logger = init_logger(__name__)

-WEIGHT_LOADER_V2_SUPPORTED = ["CompressedTensorsLinearMethod"]
+WEIGHT_LOADER_V2_SUPPORTED = [
+    "CompressedTensorsLinearMethod", "GPTQMarlinLinearMethod"
+]


 def adjust_marlin_shard(param, shard_size, shard_offset):