[Doc] Fix duplicate words in comments (#36713)
Signed-off-by: Hongbin10 <jdmjdm1998@163.com>
This commit is contained in:
@@ -264,7 +264,7 @@ class DefaultMoERunner(MoERunner):
|
||||
)
|
||||
|
||||
# Record that the shared_experts_input will be used in the
|
||||
# shared_experts_stream to to avoid gc issue from
|
||||
# shared_experts_stream to avoid gc issue from
|
||||
# deallocation. For more details:
|
||||
# https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
|
||||
# NOTE: We don't need shared_output.record_stream(current_stream())
|
||||
|
||||
@@ -50,7 +50,7 @@ def swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor:
|
||||
def rotate_weights_for_fi_trtllm_fp8_per_tensor_moe(
|
||||
gemm1_weights: torch.Tensor, gemm2_weights: torch.Tensor, is_gated_activation: bool
|
||||
):
|
||||
"""Shuffle weights for for FI TRT-LLM Format"""
|
||||
"""Shuffle weights for FI TRT-LLM Format"""
|
||||
from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_a
|
||||
|
||||
epilogue_tile_m = 128
|
||||
|
||||
@@ -57,7 +57,7 @@ class SequenceClassificationMixin(SupportsCrossEncoding, VllmModelForPooling):
|
||||
pooler_config = vllm_config.model_config.pooler_config
|
||||
assert pooler_config is not None
|
||||
|
||||
# Certain information about the the model and classifier can only be
|
||||
# Certain information about the model and classifier can only be
|
||||
# inferred from the `ForSequenceClassification` class. Therefore, we
|
||||
# instantiate it on the "meta" device to avoid allocating GPU memory.
|
||||
with torch.device("meta"):
|
||||
|
||||
Reference in New Issue
Block a user