[Doc]: fix typos in Python comments (#24417)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand
2025-09-08 09:22:16 +02:00
committed by GitHub
parent 2f0b833a05
commit f4962a6d55
12 changed files with 12 additions and 12 deletions

View File

@@ -302,7 +302,7 @@ class FusedMoEPrepareAndFinalize(ABC):
def max_num_tokens_per_rank(self) -> Optional[int]:
"""
Some PrepareFinalize All2All implementations are batched. Meaning,
they can processes only as set of tokens at a time. This
they can process only as set of tokens at a time. This
function returns the batch size i.e the maximum number of tokens
the implementation can process at a time.
Return None if there are no such restrictions.

View File

@@ -201,7 +201,7 @@ def marlin_make_workspace(output_size_per_partition: int,
def marlin_make_workspace_new(device: torch.device,
max_blocks_per_sm: int = 1) -> torch.Tensor:
# In the new marlin kernel, we use the num of threadblocks as workspace
# size. The num of threadblocks is is sms_count * max_blocks_per_sm.
# size. The num of threadblocks is sms_count * max_blocks_per_sm.
sms = torch.cuda.get_device_properties(device).multi_processor_count
return torch.zeros(sms * max_blocks_per_sm,
dtype=torch.int,