[Kernel] W8A16 Int8 inside FusedMoE (#7415)
This commit is contained in:
@@ -243,7 +243,8 @@ class ModelConfig:
|
||||
rocm_supported_quantization = ["gptq", "squeezellm", "fp8"]
|
||||
optimized_quantization_methods = [
|
||||
"fp8", "marlin", "gptq_marlin_24", "gptq_marlin", "awq_marlin",
|
||||
"fbgemm_fp8", "compressed_tensors", "compressed-tensors"
|
||||
"fbgemm_fp8", "compressed_tensors", "compressed-tensors",
|
||||
"experts_int8"
|
||||
]
|
||||
tpu_supported_quantization = ["tpu_int8"]
|
||||
if self.quantization is not None:
|
||||
|
||||
Reference in New Issue
Block a user