[Bugfix][CI] fix typos (#34934)

Signed-off-by: 1195343015 <1195343015@qq.com>
Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Jiayi Yan
2026-03-06 01:05:46 +08:00
committed by GitHub
parent 8c760b6ab6
commit 6a895197fa
98 changed files with 227 additions and 366 deletions

View File

@@ -82,7 +82,7 @@ class CPUWNA16LinearKernel(MPLinearKernel):
weight = weight.permute(0, 2, 1).reshape(input_size, output_size).contiguous()
weight = pack_quantized_values_into_int32(weight, self.config.weight_type, 1)
# make 16 output channel as a block and transpose to the make
# the block contigous
# the block contiguous
weight = (
weight.view(input_size, -1, 16 // pack_factor)
.permute(1, 0, 2)

View File

@@ -2540,7 +2540,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
)
# workspace
# |------- N tokens --------|--------- N*dcp_size tokens ----------|
# |<- use for loca_gather ->|<--------- use for allgather -------->|
# |<- use for local_gather ->|<--------- use for allgather -------->|
allgather_offset = workspace.shape[0] // (dcp_world_size + 1)
assert allgather_offset * (dcp_world_size + 1) == workspace.shape[0]
assert toks <= allgather_offset

View File

@@ -394,5 +394,5 @@ class FlashInferExperts(mk.FusedMoEExpertsModular):
def moe_sum(self, input: torch.Tensor, output: torch.Tensor) -> None:
# No support for LoRA in flashinfer_cutlass_fused_moe.
# See TODOs in flashinfer functions runMoe and runMoeMinLantency.
# See TODOs in flashinfer functions runMoe and runMoeMinLatency.
raise NotImplementedError("LoRA is not supported for flashinfer_cutlass_moe")

View File

@@ -409,7 +409,7 @@ def batched_fused_marlin_moe(
Note that the moe_align_block_size function indicates,
- What rows of the A matrix (hidden_states) to access during the
matmul, via sorted_ids output.
- What expert_id to use for each block matmul, via expert_ids ouptut.
- What expert_id to use for each block matmul, via expert_ids output.
In the batched version, the tokens are already grouped/batched by experts
they subscribe to. Due to this, we can represent the batched hidden_states

View File

@@ -606,7 +606,7 @@ class FusedMoEExperts(ABC):
"""
Whether the kernel supports deployment in particular parallel config.
Can be overriden if a kernel does not support EP, SP or some other
Can be overridden if a kernel does not support EP, SP or some other
configuration.
"""
raise NotImplementedError
@@ -620,7 +620,7 @@ class FusedMoEExperts(ABC):
"""
Whether the kernel supports a routing method (e.g. GroupedTopK).
Can be overriden by monolithic kernels that execute the router
Can be overridden by monolithic kernels that execute the router
in addition to the experts if certain routers are not supported.
"""
return True
@@ -633,7 +633,7 @@ class FusedMoEExperts(ABC):
"""
Whether a kernel supports a particular dtype for router logits input.
Can be overriden by monolithic kernels that execute the router
Can be overridden by monolithic kernels that execute the router
in addition to the experts if certain dtypes are not supported.
"""
return True

View File

@@ -1502,10 +1502,10 @@ class RowParallelLinear(LinearBase):
if self.input_is_parallel:
input_parallel = input_
else:
splitted_input = split_tensor_along_last_dim(
split_input = split_tensor_along_last_dim(
input_, num_partitions=self.tp_size
)
input_parallel = splitted_input[self.tp_rank].contiguous()
input_parallel = split_input[self.tp_rank].contiguous()
# Matrix multiply.
assert self.quant_method is not None

View File

@@ -35,7 +35,7 @@ class MultiHeadLatentAttentionWrapper(PluggableLayer):
"""Pluggable MLA layer which allows OOT backends to add
custom implementations of the outer MLA layer (including rope & o_proj).
Note that currently oot platforms can still use CustomOp.register_oot to
replace MLA layer entirly, although we use PluggableLayer to register
replace MLA layer entirely, although we use PluggableLayer to register
this layer now.
This class takes positions and hidden_states as input.

View File

@@ -191,7 +191,7 @@ class CompressedTensorsConfig(QuantizationConfig):
"""
Helper function to update target_scheme_map
since linear layers get fused into FusedMoE
targetting 'Linear' needs to also match
targeting 'Linear' needs to also match
FusedMoE modules.
"""
if (

View File

@@ -2445,7 +2445,7 @@ class CompressedTensorsW4A8Fp8MoEMethod(CompressedTensorsMoEMethod):
w2_scale=layer.w2_weight_scale, # group scale
g1_alphas=layer.w13_weight_chan_scale,
g2_alphas=layer.w2_weight_chan_scale,
per_act_token_quant=True, # always use dynamc per-token
per_act_token_quant=True, # always use dynamic per-token
per_out_ch_quant=True, # always use per-channel
)

View File

@@ -261,7 +261,7 @@ class CPUAWQLinearMethod(LinearMethodBase):
zeros = pack_cols(zeros, bits, group_num, output_size).contiguous()
# make 16 output channel as a block and transpose to
# the make the block contigous
# the make the block contiguous
weight = pack_cols(weight, bits, input_size, output_size)
weight = (
weight.view(input_size, -1, 16 // pack_factor)

View File

@@ -199,7 +199,7 @@ class TorchAOConfig(QuantizationConfig):
@classmethod
def from_config_dict_json(cls, config_dict_json: str) -> "TorchAOConfig":
"""Iniitalize class from a config_dict json string, got from
"""Initialize class from a config_dict json string, got from
torchao_config_object = some AOBaseConfig object
json.dumps(config_to_dict(torchao_config_object))
"""

View File

@@ -255,7 +255,7 @@ def _flashinfer_fp8_blockscale_gemm_impl(
This batch-size-dependent selection is essential for maintaining model accuracy.
Benchmarks on GSM8K show a significant accuracy gap (88% vs 95%) for DeepSeek-V3.1
when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accurracy
when using FlashInfer's DeepGEMM on M>=32. The M < 32 strategy fixes the accuracy
drop.
Args:

View File

@@ -39,7 +39,7 @@ def query_machete_supported_group_sizes(act_type: torch.dtype) -> list[int]:
def check_machete_supports_shape(
in_features: int, out_featrues: int
in_features: int, out_features: int
) -> tuple[bool, str | None]:
if in_features % MACHETE_PREPACKED_BLOCK_SHAPE[0] != 0:
return (
@@ -47,7 +47,7 @@ def check_machete_supports_shape(
"Input features size must be divisible by "
f"{MACHETE_PREPACKED_BLOCK_SHAPE[0]}",
)
if out_featrues % MACHETE_PREPACKED_BLOCK_SHAPE[1] != 0:
if out_features % MACHETE_PREPACKED_BLOCK_SHAPE[1] != 0:
return (
False,
"Output features size must be divisible by "

View File

@@ -237,7 +237,7 @@ class ApplyRotaryEmb(CustomOp):
Arguments of apply_rotary_emb() in vllm_flash_attn:
x: [batch_size, seq_len, nheads, headdim]
cos, sin: [seqlen_rotary, rotary_dim / 2]
interleaved: defalut as False (Neox-style).
interleaved: default as False (Neox-style).
...
"""
interleaved = not self.is_neox_style
@@ -259,7 +259,7 @@ class ApplyRotaryEmb(CustomOp):
Arguments of apply_rotary() in flash_attn:
x: [batch_size, seq_len, nheads, headdim]
cos, sin: [seqlen_rotary, rotary_dim / 2]
interleaved: defalut as False (Neox-style).
interleaved: default as False (Neox-style).
...
"""
interleaved = not self.is_neox_style

View File

@@ -342,7 +342,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
visual_token_mask = visual_token_mask.repeat(1, self.hidden_size).bool()
text_token_mask = ~visual_token_mask
final_experts_hidden_states = torch.zeros_like(hidden_states)
final_shared_ouput = (
final_shared_output = (
torch.zeros_like(hidden_states) if self.has_shared_experts else None
)
@@ -356,26 +356,26 @@ class Ernie4_5_VLMoeMoE(nn.Module):
text_router_logits, _ = self.text_experts_gate(
text_hidden_states.to(dtype=torch.float32)
)
text_shared_ouput, text_experts_output = self.text_experts(
text_shared_output, text_experts_output = self.text_experts(
hidden_states=text_hidden_states, router_logits=text_router_logits
)
final_experts_hidden_states[text_token_mask] = text_experts_output.flatten()
if self.has_shared_experts:
final_shared_ouput[text_token_mask] = text_shared_ouput.flatten()
final_shared_output[text_token_mask] = text_shared_output.flatten()
vision_router_logits, _ = self.vision_experts_gate(
vision_hidden_states.to(dtype=torch.float32)
)
vision_shared_ouput, vision_experts_output = self.vision_experts(
vision_shared_output, vision_experts_output = self.vision_experts(
hidden_states=vision_hidden_states, router_logits=vision_router_logits
)
final_experts_hidden_states[visual_token_mask] = (
vision_experts_output.flatten()
)
if self.has_shared_experts:
final_shared_ouput[visual_token_mask] = vision_shared_ouput.flatten()
final_shared_output[visual_token_mask] = vision_shared_output.flatten()
final_hidden_states = (final_shared_ouput, final_experts_hidden_states)
final_hidden_states = (final_shared_output, final_experts_hidden_states)
else:
# only text modal input
text_router_logits, _ = self.text_experts_gate(

View File

@@ -107,7 +107,7 @@ class Conv2dSubsampling(nn.Module):
)
self.subsampling = 4
left_context = right_context = 3 # both exclude currect frame
left_context = right_context = 3 # both exclude current frame
self.context = left_context + 1 + right_context # 7
def forward(

View File

@@ -115,7 +115,7 @@ class EncoderLayerSANM(nn.Module):
hidden_states: torch.Tensor,
mask: torch.Tensor | None = None,
cache=None,
mask_shfit_chunk=None,
mask_shift_chunk=None,
mask_att_chunk_encoder=None,
):
residual = hidden_states
@@ -125,14 +125,14 @@ class EncoderLayerSANM(nn.Module):
hidden_states = residual + self.self_attn(
hidden_states,
mask,
mask_shfit_chunk=mask_shfit_chunk,
mask_shift_chunk=mask_shift_chunk,
mask_att_chunk_encoder=mask_att_chunk_encoder,
)
else:
hidden_states = self.self_attn(
hidden_states,
mask,
mask_shfit_chunk=mask_shfit_chunk,
mask_shift_chunk=mask_shift_chunk,
mask_att_chunk_encoder=mask_att_chunk_encoder,
)
@@ -140,7 +140,7 @@ class EncoderLayerSANM(nn.Module):
hidden_states = self.norm2(hidden_states)
hidden_states = residual + self.feed_forward(hidden_states)
return hidden_states, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
return hidden_states, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
class MultiHeadedAttentionSANM(nn.Module):
@@ -183,13 +183,13 @@ class MultiHeadedAttentionSANM(nn.Module):
self,
inputs: torch.Tensor,
mask: torch.Tensor,
mask_shfit_chunk: torch.Tensor = None,
mask_shift_chunk: torch.Tensor = None,
):
b, t, d = inputs.size()
if mask is not None:
mask = torch.reshape(mask, (b, -1, 1))
if mask_shfit_chunk is not None:
mask = mask * mask_shfit_chunk
if mask_shift_chunk is not None:
mask = mask * mask_shift_chunk
inputs = inputs * mask
x = inputs.transpose(1, 2)
@@ -243,11 +243,11 @@ class MultiHeadedAttentionSANM(nn.Module):
self,
hidden_states: torch.Tensor,
mask: torch.Tensor,
mask_shfit_chunk: torch.Tensor = None,
mask_shift_chunk: torch.Tensor = None,
mask_att_chunk_encoder: torch.Tensor = None,
):
q_h, k_h, v_h, v = self.forward_qkv(hidden_states)
fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
fsmn_memory = self.forward_fsmn(v, mask, mask_shift_chunk)
q_h = q_h * self.d_k ** (-0.5)
scores = torch.matmul(q_h, k_h.transpose(-2, -1))
att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)

View File

@@ -646,7 +646,7 @@ class IsaacImageProcessor:
return_tensors: str | TensorType | None,
**kwargs: Unpack[IsaacImageProcessorKwargs],
) -> BatchFeature:
"""Preprocess images into format compatibile with vLLM input processing."""
"""Preprocess images into format compatible with vLLM input processing."""
all_pixel_values: list[torch.Tensor] = []
all_image_grids: list[torch.Tensor] = []

View File

@@ -299,7 +299,7 @@ class KeyeVisionEmbeddings(nn.Module):
)
(
batch_size,
squence_len,
sequence_len,
channel,
height,
width,

View File

@@ -238,7 +238,7 @@ class LongcatRouter(nn.Module):
self,
config: FlashConfig,
zero_expert_num: int,
rounter_params_dtype: torch.dtype,
router_params_dtype: torch.dtype,
prefix: str = "",
):
super().__init__()
@@ -252,12 +252,12 @@ class LongcatRouter(nn.Module):
config.hidden_size,
self.n_routed_experts,
bias=config.router_bias,
params_dtype=rounter_params_dtype,
params_dtype=router_params_dtype,
quant_config=None,
prefix=f"{prefix}.classifier",
)
self.e_score_correction_bias = nn.Parameter(
torch.zeros((self.n_routed_experts), dtype=rounter_params_dtype)
torch.zeros((self.n_routed_experts), dtype=router_params_dtype)
)
def forward(self, hidden_states):
@@ -281,14 +281,14 @@ class LongcatMoe(nn.Module):
super().__init__()
self.hidden_size = hidden_size
# Gate always runs at half / full precision for now.
self.rounter_params_dtype = params_dtype
self.router_params_dtype = params_dtype
if config.router_dtype == "float32":
self.rounter_params_dtype = torch.float32
self.router_params_dtype = torch.float32
self.router = LongcatRouter(
config=config,
zero_expert_num=config.zero_expert_num,
rounter_params_dtype=self.rounter_params_dtype,
router_params_dtype=self.router_params_dtype,
prefix=f"{prefix}.gate",
)
@@ -309,7 +309,7 @@ class LongcatMoe(nn.Module):
prefix=f"{prefix}.experts",
enable_eplb=enable_eplb,
routed_scaling_factor=config.routed_scaling_factor,
router_logits_dtype=self.rounter_params_dtype,
router_logits_dtype=self.router_params_dtype,
)
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -329,7 +329,7 @@ class LongcatMoe(nn.Module):
hidden_states_padded = hidden_states
router_logits_full = self.router(
hidden_states_padded.to(self.rounter_params_dtype)
hidden_states_padded.to(self.router_params_dtype)
)
# ZeroExpertFusedMoE handles routing memoization and zero expert computation

View File

@@ -1321,14 +1321,14 @@ def get_image_size(image: ImageInput) -> ImageSize:
raise ValueError(f"Unknown image type: {type(image)}")
def exif_tranpose(
def exif_transpose(
images: ImageInput | None,
) -> ImageInput | None:
if images is None:
return None
if images is not None and isinstance(images, (list, tuple)):
images = [
exif_tranpose(img) if isinstance(img, Image) else img for img in images
exif_transpose(img) if isinstance(img, Image) else img for img in images
]
elif images is not None and isinstance(images, Image):
images = ImageOps.exif_transpose(images)
@@ -1667,7 +1667,7 @@ class Molmo2ProcessorWrapper:
**kwargs: object,
) -> BatchFeature:
inputs = [text]
images = exif_tranpose(images)
images = exif_transpose(images)
if getattr(self.processor, "image_processor", None) is not None:
inputs.append(images)
if getattr(self.processor, "video_processor", None) is not None:
@@ -2352,7 +2352,7 @@ class Molmo2MultiModalProcessor(BaseMultiModalProcessor[Molmo2ProcessingInfo]):
def get_image_replacement_molmo2(item_idx: int) -> list[int]:
images = mm_items.get_items("image", ImageProcessorItems)
image = images.get(item_idx)
image = exif_tranpose(image)
image = exif_transpose(image)
resize_nrows, resize_cols = processor.get_base_grid_size(is_video=False)
if use_single_crop_col_tokens is not None:

View File

@@ -349,7 +349,7 @@ class NemotronHMoEDecoderLayer(nn.Module):
super().__init__()
self.config = config
# Get per-layer config for heterogeneous models if exsist
# Get per-layer config for heterogeneous models if exists
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
layer_config = get_layer_config(layer_idx) if get_layer_config else config
@@ -517,7 +517,7 @@ class NemotronHAttentionDecoderLayer(nn.Module):
) -> None:
super().__init__()
# Get per-layer config for heterogeneous models if exsist
# Get per-layer config for heterogeneous models if exists
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
layer_config = get_layer_config(layer_idx) if get_layer_config else config

View File

@@ -486,7 +486,7 @@ class SiglipVisionEmbeddings(nn.Module):
)
(
batch_size,
squence_len,
sequence_len,
channel,
height,
width,

View File

@@ -689,19 +689,19 @@ class ConformerEncoder(TransformerEncoderBase):
default False.
ext_pw_out_channel: int, optional
the number of channel for CNN
before depthwise_seperable_CNN.
before depthwise_separable_CNN.
If 0 then use linear. default 0.
ext_pw_kernel_size: int, optional
kernel size of N before depthwise_seperable_CNN.
kernel size of N before depthwise_separable_CNN.
only work for ext_pw_out_channel > 0.
default 1
depthwise_seperable_out_channel: int, optional
the number of channel for
depthwise_seperable_CNN.
depthwise_separable_CNN.
default 256.
depthwise_multiplier: int, optional
the number of multiplier for
depthwise_seperable_CNN.
depthwise_separable_CNN.
default 1.
chunk_se: int, optional
0 for offline SE.
@@ -711,7 +711,7 @@ class ConformerEncoder(TransformerEncoderBase):
by only the current chunk.
default 0.
kernel_size: int, optional
the number of kernels for depthwise_seperable_CNN.
the number of kernels for depthwise_separable_CNN.
default 3.
activation: str, optional
FeedForward block activation.
@@ -721,7 +721,7 @@ class ConformerEncoder(TransformerEncoderBase):
activation function used in ConvModule part
of the conformer, default "relu".
conv_glu_type: str, optional
activation used use glu in depthwise_seperable_CNN,
activation used use glu in depthwise_separable_CNN,
default "sigmoid"
bias_in_glu: bool, optional
if set to True, use additive bias in the weight module

View File

@@ -217,8 +217,8 @@ class GLUPointWiseConv(nn.Module):
return x
class DepthWiseSeperableConv1d(nn.Module):
"""DepthWiseSeperableConv1d module used in Convnet module
class DepthWiseSeparableConv1d(nn.Module):
"""DepthWiseSeparableConv1d module used in ConvNet module
for the conformer, for more details see:
https://arxiv.org/pdf/2005.08100v1.pdf
@@ -390,7 +390,7 @@ class ConvModule(nn.Module):
else:
padding = (kernel_size - 1) // 2
self.dw_sep_conv_1d = DepthWiseSeperableConv1d(
self.dw_sep_conv_1d = DepthWiseSeparableConv1d(
input_dim,
depthwise_seperable_out_channel,
kernel_size,

View File

@@ -916,7 +916,7 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
self, max_pixels: int | None = None
) -> ImageSize:
# NOTE: Simply processing a huge size with _get_vision_info might not give a
# size that maximizes the number of featrues, i.e., the number of (merged)
# size that maximizes the number of features, i.e., the number of (merged)
# patches. This is because the number of patches limits the allowed aspect
# ratios. For example, suppose the maximum number of patches is 1280. A square
# image cannot be broken down into 1280 patches, so feeding a giant square image

View File

@@ -459,14 +459,14 @@ class Step3VLProcessor:
image_inputs = {}
text_inputs = self.tokenizer(text)
else:
splitted_images_data = self._split_images(images)
split_images_data = self._split_images(images)
pixel_values_lst = []
patch_pixel_values_lst = []
patch_newline_mask_lst = []
image_repl_str_lst = []
image_repl_ids_lst = []
num_patches = []
for raw_img, img_patches, patch_newline_mask in splitted_images_data:
for raw_img, img_patches, patch_newline_mask in split_images_data:
pixel_values_lst.extend(self._convert_images_to_pixel_values([raw_img]))
if len(img_patches) > 0:

View File

@@ -353,7 +353,7 @@ class FusedMoEBlock(nn.Module):
if swiglu_limit not in (None, 0):
swiglu_limit = float(swiglu_limit)
assert swiglu_limit == 7.0, (
"Swiglu limit in fused moe block only suport 7.0 now."
"Swiglu limit in fused moe block only support 7.0 now."
)
activation = "swiglustep"
logger.debug(