[ Kernel ] Enable Dynamic Per Token fp8 (#6547)
This commit is contained in:
@@ -214,7 +214,8 @@ class Fp8LinearMethod(LinearMethodBase):
|
||||
weight_scale=layer.weight_scale,
|
||||
input_scale=layer.input_scale,
|
||||
bias=bias,
|
||||
cutlass_fp8_supported=self.cutlass_fp8_supported)
|
||||
cutlass_fp8_supported=self.cutlass_fp8_supported,
|
||||
use_per_token_if_dynamic=False)
|
||||
|
||||
|
||||
class Fp8MoEMethod(FusedMoEMethodBase):
|
||||
|
||||
Reference in New Issue
Block a user