[ Kernel ] Enable Dynamic Per Token fp8 (#6547)

This commit is contained in:
Robert Shaw
2024-07-19 19:08:15 -04:00
committed by GitHub
parent 07eb6f19f3
commit 4cc24f01b1
7 changed files with 67 additions and 38 deletions

View File

@@ -214,7 +214,8 @@ class Fp8LinearMethod(LinearMethodBase):
weight_scale=layer.weight_scale,
input_scale=layer.input_scale,
bias=bias,
cutlass_fp8_supported=self.cutlass_fp8_supported)
cutlass_fp8_supported=self.cutlass_fp8_supported,
use_per_token_if_dynamic=False)
class Fp8MoEMethod(FusedMoEMethodBase):