diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 3f63bd2dc..f62e4468e 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -3881,7 +3881,7 @@ def contains_object_print(text): Check if the text looks like a printed Python object, e.g. contains any substring matching the pattern: "at 0xFFFFFFF>" We match against 0x followed by 2-16 hex chars (there's - a max of 16 on a 64 bit system). + a max of 16 on a 64-bit system). Args: text (str): The text to check diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index 63af3ff7f..37c360145 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -60,7 +60,7 @@ class Internlm2ToolParser(ToolParser): if '<|action_start|>' not in current_text: self.position = len(current_text) return DeltaMessage(content=delta_text) - # if the tool call is sended, return an empty delta message + # if the tool call is sent, return an empty delta message # to make sure the finish_reason will be sent correctly. if self.current_tool_id > 0: return DeltaMessage(content='') diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py index ad58a9918..a7b3c8148 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py @@ -502,7 +502,7 @@ def _chunk_state_varlen_kernel( dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize # If the sequence starts after the last chunk idx, we don't need to add the contribution from the last chunk - # If HAS_INITSTATES==True need to consider two possiblties + # If HAS_INITSTATES==True need to consider two possibilities # - if start_idx < pid_c * chunk_size, then we need to take the past_states_ptrs # - if state_idx >= pid * chunk_size, then we need to insert initstates if ((start_idx < pid_c * chunk_size) # first chunk diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py index 13ed4da06..be82c2fd5 100644 --- a/vllm/model_executor/models/arcee.py +++ b/vllm/model_executor/models/arcee.py @@ -342,7 +342,7 @@ class ArceeModel(nn.Module): class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP): """Arcee Model for causal language modeling, integrated with vLLM runtime.""" - # Map fused module names to their sub-module components + # Map fused module names to their submodule components # (for quantization and LoRA) packed_modules_mapping = { "qkv_proj": ["q_proj", "k_proj", "v_proj"], diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index bc340a9e2..46d54452a 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -835,7 +835,7 @@ class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, return None # The result multimodal_embeddings is tuple of tensors, with each - # tensor correspoending to a multimodal data item (image or video). + # tensor corresponding to a multimodal data item (image or video). multimodal_embeddings: tuple[torch.Tensor, ...] = () # NOTE: It is important to iterate over the keys in this dictionary diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index 6d973a964..ab63649b4 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -1350,7 +1350,7 @@ class Phi4MultimodalForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal): return None # The result multimodal_embeddings is tuple of tensors, with each - # tensor correspoending to a multimodal data item (image or video). + # tensor corresponding to a multimodal data item (image or video). multimodal_embeddings: tuple[torch.Tensor, ...] = () # NOTE: It is important to iterate over the keys in this dictionary diff --git a/vllm/model_executor/models/phi4mm_audio.py b/vllm/model_executor/models/phi4mm_audio.py index a2481375f..a1c452053 100644 --- a/vllm/model_executor/models/phi4mm_audio.py +++ b/vllm/model_executor/models/phi4mm_audio.py @@ -100,7 +100,7 @@ class ConformerEncoderLayer(nn.Module): activation function for glu used in the multihead attention, default "swish". activation_checkpointing: str, optional - a dictionarry of {"module","interval","offload"}, where + a dictionary of {"module","interval","offload"}, where "module": str accept ["transformer", "attention"] to select which module should do activation checkpointing. diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index e79428d17..f8a943d4c 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -846,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration( return [] # The result multimodal_embeddings is tuple of tensors, with each - # tensor correspoending to a multimodal data item (image or video). + # tensor corresponding to a multimodal data item (image or video). multimodal_embeddings: tuple[torch.Tensor, ...] = () # NOTE: It is important to iterate over the keys in this dictionary @@ -873,7 +873,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration( if multimodal_embeddings is not None \ and len(multimodal_embeddings) != 0: - # TODO (ywang96): support overlapping modalitiy embeddings so that + # TODO (ywang96): support overlapping modality embeddings so that # `use_audio_in_video` will work on V1. inputs_embeds = merge_multimodal_embeddings( input_ids, inputs_embeds, multimodal_embeddings, [ diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 440a206eb..5308a1113 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -463,7 +463,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]): self.dcp_world_size = 1 self.dcp_rank = 0 - # Dont try to access the runner on AMD + # Don't try to access the runner on AMD if self.aot_schedule: self.page_size = self.kv_cache_spec.block_size