[Model] Snowflake arctic model implementation (#4652)

Co-authored-by: Dash Desai <1723932+iamontheinet@users.noreply.github.com>
Co-authored-by: Aurick Qiao <qiao@aurick.net>
Co-authored-by: Aurick Qiao <aurick.qiao@snowflake.com>
Co-authored-by: Aurick Qiao <aurickq@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
Hao Zhang
2024-05-09 15:37:14 -07:00
committed by GitHub
parent be0c5180ac
commit ebce310b74
8 changed files with 1042 additions and 48 deletions

View File

@@ -4,6 +4,8 @@ from vllm.model_executor.layers.quantization.aqlm import AQLMConfig
from vllm.model_executor.layers.quantization.awq import AWQConfig
from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig)
from vllm.model_executor.layers.quantization.deepspeedfp import (
DeepSpeedFPConfig)
from vllm.model_executor.layers.quantization.fp8 import Fp8Config
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.gptq_marlin import (
@@ -19,6 +21,7 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"squeezellm": SqueezeLLMConfig,
"gptq_marlin": GPTQMarlinConfig,
"marlin": MarlinConfig,
"deepspeedfp": DeepSpeedFPConfig
}