[Misc] Update gptq_marlin to use new vLLMParameters (#7281)

This commit is contained in:
Dipika Sikka
2024-08-13 14:30:11 -04:00
committed by GitHub
parent 181abbc27d
commit fb377d7e74
8 changed files with 234 additions and 98 deletions

View File

@@ -20,7 +20,9 @@ from vllm.model_executor.utils import set_weight_attrs
logger = init_logger(__name__)
WEIGHT_LOADER_V2_SUPPORTED = ["CompressedTensorsLinearMethod"]
WEIGHT_LOADER_V2_SUPPORTED = [
"CompressedTensorsLinearMethod", "GPTQMarlinLinearMethod"
]
def adjust_marlin_shard(param, shard_size, shard_offset):