[Doc] Fix duplicate words in comments (#36713)
Signed-off-by: Hongbin10 <jdmjdm1998@163.com>
This commit is contained in:
@@ -264,7 +264,7 @@ class DefaultMoERunner(MoERunner):
|
||||
)
|
||||
|
||||
# Record that the shared_experts_input will be used in the
|
||||
# shared_experts_stream to to avoid gc issue from
|
||||
# shared_experts_stream to avoid gc issue from
|
||||
# deallocation. For more details:
|
||||
# https://docs.pytorch.org/docs/stable/generated/torch.Tensor.record_stream.html # noqa: E501
|
||||
# NOTE: We don't need shared_output.record_stream(current_stream())
|
||||
|
||||
@@ -50,7 +50,7 @@ def swap_w13_to_w31(x: torch.Tensor) -> torch.Tensor:
|
||||
def rotate_weights_for_fi_trtllm_fp8_per_tensor_moe(
|
||||
gemm1_weights: torch.Tensor, gemm2_weights: torch.Tensor, is_gated_activation: bool
|
||||
):
|
||||
"""Shuffle weights for for FI TRT-LLM Format"""
|
||||
"""Shuffle weights for FI TRT-LLM Format"""
|
||||
from flashinfer import reorder_rows_for_gated_act_gemm, shuffle_matrix_a
|
||||
|
||||
epilogue_tile_m = 128
|
||||
|
||||
@@ -57,7 +57,7 @@ class SequenceClassificationMixin(SupportsCrossEncoding, VllmModelForPooling):
|
||||
pooler_config = vllm_config.model_config.pooler_config
|
||||
assert pooler_config is not None
|
||||
|
||||
# Certain information about the the model and classifier can only be
|
||||
# Certain information about the model and classifier can only be
|
||||
# inferred from the `ForSequenceClassification` class. Therefore, we
|
||||
# instantiate it on the "meta" device to avoid allocating GPU memory.
|
||||
with torch.device("meta"):
|
||||
|
||||
@@ -952,7 +952,7 @@ class OpenCVDynamicOpenPanguVideoBackend(VideoLoader, OpenCVVideoBackendMixin):
|
||||
frame_recovery=frame_recovery,
|
||||
)
|
||||
|
||||
# Use transformers transformers.video_utils.VideoMetadata format
|
||||
# Use transformers.video_utils.VideoMetadata format
|
||||
metadata = cls.create_hf_metadata(
|
||||
source=source,
|
||||
video_backend="opencv_dynamic",
|
||||
|
||||
@@ -44,7 +44,7 @@ def maybe_serialize_tool_calls(request: "MistralChatCompletionRequest"):
|
||||
# SEE: https://github.com/vllm-project/vllm/pull/9951
|
||||
# Credits go to: @gcalmettes
|
||||
# NOTE: There is currently a bug in pydantic where attributes
|
||||
# declared as iterables are replaced in in the instances by
|
||||
# declared as iterables are replaced in the instances by
|
||||
# pydantic-core ValidatorIterator instance. In particular, this
|
||||
# affects tool_calls defined in ChatCompletionAssistantMessageParam
|
||||
# model:
|
||||
|
||||
@@ -1055,6 +1055,6 @@ def init_worker_distributed_environment(
|
||||
parallel_config.decode_context_parallel_size,
|
||||
)
|
||||
|
||||
# Init ec connector here before KV caches caches init
|
||||
# Init ec connector here before KV caches init
|
||||
# NOTE: We do not init KV caches for Encoder-only instance in EPD disagg mode
|
||||
ensure_ec_transfer_initialized(vllm_config)
|
||||
|
||||
Reference in New Issue
Block a user