[Quant] Add SupportsQuant to phi3 and clip (#13104)

This commit is contained in:
Kyle Sayers
2025-02-15 22:28:33 -05:00
committed by GitHub
parent 80f63a3966
commit 12913d17ba
25 changed files with 67 additions and 13 deletions

View File

@@ -20,6 +20,7 @@ class NeuronQuantConfig(QuantizationConfig):
dequant_dtype: str = "f16",
quantize_method: str = "vector_dynamic",
) -> None:
super().__init__()
self.quant_dtype = os.getenv("NEURON_QUANT_DTYPE", "s8")
if self.quant_dtype not in SUPPORTED_QUANT_DTYPE_LIST:
raise ValueError(