[Doc]: fixing multiple typos in diverse files (#33256)
Signed-off-by: Didier Durand <durand.didier@gmail.com> Signed-off-by: Didier Durand <2927957+didier-durand@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -30,7 +30,7 @@ class SharedFusedMoE(FusedMoE):
|
||||
|
||||
# Disable shared expert overlap if:
|
||||
# - we are using eplb with non-default backend, because of correctness issues
|
||||
# - we are using flashinfer with DP, since there nothint to gain
|
||||
# - we are using flashinfer with DP, since there nothing to gain
|
||||
# - we are using marlin kernels
|
||||
backend = self.moe_parallel_config.all2all_backend
|
||||
self.use_overlapped = (
|
||||
|
||||
@@ -22,7 +22,7 @@ class ScaledMMLinearLayerConfig:
|
||||
|
||||
@dataclass
|
||||
class Int8ScaledMMLinearLayerConfig(ScaledMMLinearLayerConfig):
|
||||
# TODO: Chnage to QuantKey like FP8ScaledMMLinearLayerConfig
|
||||
# TODO: Change to QuantKey like FP8ScaledMMLinearLayerConfig
|
||||
is_static_input_scheme: bool
|
||||
is_channelwise: bool
|
||||
input_symmetric: bool
|
||||
|
||||
@@ -119,7 +119,7 @@ def choose_scaled_mm_linear_kernel(
|
||||
config (_KernelConfigT): Description of the linear layer
|
||||
to be implemented.
|
||||
possible_kernels (dict[PlatformEnum, list[_KernelT]]): A
|
||||
dictionary of platforms and their list list of possible kernels.
|
||||
dictionary of platforms and their list of possible kernels.
|
||||
compute_capability (Optional[int], optional): The compute capability of
|
||||
the target device, if None uses `current_platform` to get the
|
||||
compute capability. Defaults to None.
|
||||
|
||||
Reference in New Issue
Block a user