[Neuron] Adding support for adding/ overriding neuron configuration a… (#8062)

Co-authored-by: Harsha Bikki <harbikh@amazon.com>
This commit is contained in:
Harsha vardhan manoj Bikki
2024-09-04 16:33:43 -07:00
committed by GitHub
parent 77d9e514a2
commit 008cf886c9
8 changed files with 243 additions and 42 deletions

View File

@@ -22,6 +22,8 @@ from vllm.model_executor.layers.quantization.gptq_marlin import (
from vllm.model_executor.layers.quantization.gptq_marlin_24 import (
GPTQMarlin24Config)
from vllm.model_executor.layers.quantization.marlin import MarlinConfig
from vllm.model_executor.layers.quantization.neuron_quant import (
NeuronQuantConfig)
from vllm.model_executor.layers.quantization.qqq import QQQConfig
from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig
@@ -46,6 +48,7 @@ QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"bitsandbytes": BitsAndBytesConfig,
"qqq": QQQConfig,
"experts_int8": ExpertsInt8Config,
"neuron_quant": NeuronQuantConfig,
}