[ Misc ] fbgemm checkpoints (#6559)

This commit is contained in:
Robert Shaw
2024-07-20 12:36:57 -04:00
committed by GitHub
parent 9042d68362
commit 683e3cb9c4
24 changed files with 234 additions and 47 deletions

View File

@@ -10,6 +10,7 @@ from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tenso
CompressedTensorsConfig)
from vllm.model_executor.layers.quantization.deepspeedfp import (
DeepSpeedFPConfig)
from vllm.model_executor.layers.quantization.fbgemm_fp8 import FBGEMMFp8Config
from vllm.model_executor.layers.quantization.fp8 import Fp8Config
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.gptq_marlin import (
@@ -24,6 +25,7 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"awq": AWQConfig,
"deepspeedfp": DeepSpeedFPConfig,
"fp8": Fp8Config,
"fbgemm_fp8": FBGEMMFp8Config,
# The order of gptq methods is important for config.py iteration over
# override_quantization_method(..)
"marlin": MarlinConfig,