[Bugfix][CI] fix typos (#34934)

Signed-off-by: 1195343015 <1195343015@qq.com>
Signed-off-by: Jiayi Yan <66017932+1195343015@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Jiayi Yan
2026-03-06 01:05:46 +08:00
committed by GitHub
parent 8c760b6ab6
commit 6a895197fa
98 changed files with 227 additions and 366 deletions

View File

@@ -342,7 +342,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
visual_token_mask = visual_token_mask.repeat(1, self.hidden_size).bool()
text_token_mask = ~visual_token_mask
final_experts_hidden_states = torch.zeros_like(hidden_states)
final_shared_ouput = (
final_shared_output = (
torch.zeros_like(hidden_states) if self.has_shared_experts else None
)
@@ -356,26 +356,26 @@ class Ernie4_5_VLMoeMoE(nn.Module):
text_router_logits, _ = self.text_experts_gate(
text_hidden_states.to(dtype=torch.float32)
)
text_shared_ouput, text_experts_output = self.text_experts(
text_shared_output, text_experts_output = self.text_experts(
hidden_states=text_hidden_states, router_logits=text_router_logits
)
final_experts_hidden_states[text_token_mask] = text_experts_output.flatten()
if self.has_shared_experts:
final_shared_ouput[text_token_mask] = text_shared_ouput.flatten()
final_shared_output[text_token_mask] = text_shared_output.flatten()
vision_router_logits, _ = self.vision_experts_gate(
vision_hidden_states.to(dtype=torch.float32)
)
vision_shared_ouput, vision_experts_output = self.vision_experts(
vision_shared_output, vision_experts_output = self.vision_experts(
hidden_states=vision_hidden_states, router_logits=vision_router_logits
)
final_experts_hidden_states[visual_token_mask] = (
vision_experts_output.flatten()
)
if self.has_shared_experts:
final_shared_ouput[visual_token_mask] = vision_shared_ouput.flatten()
final_shared_output[visual_token_mask] = vision_shared_output.flatten()
final_hidden_states = (final_shared_ouput, final_experts_hidden_states)
final_hidden_states = (final_shared_output, final_experts_hidden_states)
else:
# only text modal input
text_router_logits, _ = self.text_experts_gate(

View File

@@ -107,7 +107,7 @@ class Conv2dSubsampling(nn.Module):
)
self.subsampling = 4
left_context = right_context = 3 # both exclude currect frame
left_context = right_context = 3 # both exclude current frame
self.context = left_context + 1 + right_context # 7
def forward(

View File

@@ -115,7 +115,7 @@ class EncoderLayerSANM(nn.Module):
hidden_states: torch.Tensor,
mask: torch.Tensor | None = None,
cache=None,
mask_shfit_chunk=None,
mask_shift_chunk=None,
mask_att_chunk_encoder=None,
):
residual = hidden_states
@@ -125,14 +125,14 @@ class EncoderLayerSANM(nn.Module):
hidden_states = residual + self.self_attn(
hidden_states,
mask,
mask_shfit_chunk=mask_shfit_chunk,
mask_shift_chunk=mask_shift_chunk,
mask_att_chunk_encoder=mask_att_chunk_encoder,
)
else:
hidden_states = self.self_attn(
hidden_states,
mask,
mask_shfit_chunk=mask_shfit_chunk,
mask_shift_chunk=mask_shift_chunk,
mask_att_chunk_encoder=mask_att_chunk_encoder,
)
@@ -140,7 +140,7 @@ class EncoderLayerSANM(nn.Module):
hidden_states = self.norm2(hidden_states)
hidden_states = residual + self.feed_forward(hidden_states)
return hidden_states, mask, cache, mask_shfit_chunk, mask_att_chunk_encoder
return hidden_states, mask, cache, mask_shift_chunk, mask_att_chunk_encoder
class MultiHeadedAttentionSANM(nn.Module):
@@ -183,13 +183,13 @@ class MultiHeadedAttentionSANM(nn.Module):
self,
inputs: torch.Tensor,
mask: torch.Tensor,
mask_shfit_chunk: torch.Tensor = None,
mask_shift_chunk: torch.Tensor = None,
):
b, t, d = inputs.size()
if mask is not None:
mask = torch.reshape(mask, (b, -1, 1))
if mask_shfit_chunk is not None:
mask = mask * mask_shfit_chunk
if mask_shift_chunk is not None:
mask = mask * mask_shift_chunk
inputs = inputs * mask
x = inputs.transpose(1, 2)
@@ -243,11 +243,11 @@ class MultiHeadedAttentionSANM(nn.Module):
self,
hidden_states: torch.Tensor,
mask: torch.Tensor,
mask_shfit_chunk: torch.Tensor = None,
mask_shift_chunk: torch.Tensor = None,
mask_att_chunk_encoder: torch.Tensor = None,
):
q_h, k_h, v_h, v = self.forward_qkv(hidden_states)
fsmn_memory = self.forward_fsmn(v, mask, mask_shfit_chunk)
fsmn_memory = self.forward_fsmn(v, mask, mask_shift_chunk)
q_h = q_h * self.d_k ** (-0.5)
scores = torch.matmul(q_h, k_h.transpose(-2, -1))
att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)

View File

@@ -646,7 +646,7 @@ class IsaacImageProcessor:
return_tensors: str | TensorType | None,
**kwargs: Unpack[IsaacImageProcessorKwargs],
) -> BatchFeature:
"""Preprocess images into format compatibile with vLLM input processing."""
"""Preprocess images into format compatible with vLLM input processing."""
all_pixel_values: list[torch.Tensor] = []
all_image_grids: list[torch.Tensor] = []

View File

@@ -299,7 +299,7 @@ class KeyeVisionEmbeddings(nn.Module):
)
(
batch_size,
squence_len,
sequence_len,
channel,
height,
width,

View File

@@ -238,7 +238,7 @@ class LongcatRouter(nn.Module):
self,
config: FlashConfig,
zero_expert_num: int,
rounter_params_dtype: torch.dtype,
router_params_dtype: torch.dtype,
prefix: str = "",
):
super().__init__()
@@ -252,12 +252,12 @@ class LongcatRouter(nn.Module):
config.hidden_size,
self.n_routed_experts,
bias=config.router_bias,
params_dtype=rounter_params_dtype,
params_dtype=router_params_dtype,
quant_config=None,
prefix=f"{prefix}.classifier",
)
self.e_score_correction_bias = nn.Parameter(
torch.zeros((self.n_routed_experts), dtype=rounter_params_dtype)
torch.zeros((self.n_routed_experts), dtype=router_params_dtype)
)
def forward(self, hidden_states):
@@ -281,14 +281,14 @@ class LongcatMoe(nn.Module):
super().__init__()
self.hidden_size = hidden_size
# Gate always runs at half / full precision for now.
self.rounter_params_dtype = params_dtype
self.router_params_dtype = params_dtype
if config.router_dtype == "float32":
self.rounter_params_dtype = torch.float32
self.router_params_dtype = torch.float32
self.router = LongcatRouter(
config=config,
zero_expert_num=config.zero_expert_num,
rounter_params_dtype=self.rounter_params_dtype,
router_params_dtype=self.router_params_dtype,
prefix=f"{prefix}.gate",
)
@@ -309,7 +309,7 @@ class LongcatMoe(nn.Module):
prefix=f"{prefix}.experts",
enable_eplb=enable_eplb,
routed_scaling_factor=config.routed_scaling_factor,
router_logits_dtype=self.rounter_params_dtype,
router_logits_dtype=self.router_params_dtype,
)
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
@@ -329,7 +329,7 @@ class LongcatMoe(nn.Module):
hidden_states_padded = hidden_states
router_logits_full = self.router(
hidden_states_padded.to(self.rounter_params_dtype)
hidden_states_padded.to(self.router_params_dtype)
)
# ZeroExpertFusedMoE handles routing memoization and zero expert computation

View File

@@ -1321,14 +1321,14 @@ def get_image_size(image: ImageInput) -> ImageSize:
raise ValueError(f"Unknown image type: {type(image)}")
def exif_tranpose(
def exif_transpose(
images: ImageInput | None,
) -> ImageInput | None:
if images is None:
return None
if images is not None and isinstance(images, (list, tuple)):
images = [
exif_tranpose(img) if isinstance(img, Image) else img for img in images
exif_transpose(img) if isinstance(img, Image) else img for img in images
]
elif images is not None and isinstance(images, Image):
images = ImageOps.exif_transpose(images)
@@ -1667,7 +1667,7 @@ class Molmo2ProcessorWrapper:
**kwargs: object,
) -> BatchFeature:
inputs = [text]
images = exif_tranpose(images)
images = exif_transpose(images)
if getattr(self.processor, "image_processor", None) is not None:
inputs.append(images)
if getattr(self.processor, "video_processor", None) is not None:
@@ -2352,7 +2352,7 @@ class Molmo2MultiModalProcessor(BaseMultiModalProcessor[Molmo2ProcessingInfo]):
def get_image_replacement_molmo2(item_idx: int) -> list[int]:
images = mm_items.get_items("image", ImageProcessorItems)
image = images.get(item_idx)
image = exif_tranpose(image)
image = exif_transpose(image)
resize_nrows, resize_cols = processor.get_base_grid_size(is_video=False)
if use_single_crop_col_tokens is not None:

View File

@@ -349,7 +349,7 @@ class NemotronHMoEDecoderLayer(nn.Module):
super().__init__()
self.config = config
# Get per-layer config for heterogeneous models if exsist
# Get per-layer config for heterogeneous models if exists
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
layer_config = get_layer_config(layer_idx) if get_layer_config else config
@@ -517,7 +517,7 @@ class NemotronHAttentionDecoderLayer(nn.Module):
) -> None:
super().__init__()
# Get per-layer config for heterogeneous models if exsist
# Get per-layer config for heterogeneous models if exists
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
layer_config = get_layer_config(layer_idx) if get_layer_config else config

View File

@@ -486,7 +486,7 @@ class SiglipVisionEmbeddings(nn.Module):
)
(
batch_size,
squence_len,
sequence_len,
channel,
height,
width,

View File

@@ -689,19 +689,19 @@ class ConformerEncoder(TransformerEncoderBase):
default False.
ext_pw_out_channel: int, optional
the number of channel for CNN
before depthwise_seperable_CNN.
before depthwise_separable_CNN.
If 0 then use linear. default 0.
ext_pw_kernel_size: int, optional
kernel size of N before depthwise_seperable_CNN.
kernel size of N before depthwise_separable_CNN.
only work for ext_pw_out_channel > 0.
default 1
depthwise_seperable_out_channel: int, optional
the number of channel for
depthwise_seperable_CNN.
depthwise_separable_CNN.
default 256.
depthwise_multiplier: int, optional
the number of multiplier for
depthwise_seperable_CNN.
depthwise_separable_CNN.
default 1.
chunk_se: int, optional
0 for offline SE.
@@ -711,7 +711,7 @@ class ConformerEncoder(TransformerEncoderBase):
by only the current chunk.
default 0.
kernel_size: int, optional
the number of kernels for depthwise_seperable_CNN.
the number of kernels for depthwise_separable_CNN.
default 3.
activation: str, optional
FeedForward block activation.
@@ -721,7 +721,7 @@ class ConformerEncoder(TransformerEncoderBase):
activation function used in ConvModule part
of the conformer, default "relu".
conv_glu_type: str, optional
activation used use glu in depthwise_seperable_CNN,
activation used use glu in depthwise_separable_CNN,
default "sigmoid"
bias_in_glu: bool, optional
if set to True, use additive bias in the weight module

View File

@@ -217,8 +217,8 @@ class GLUPointWiseConv(nn.Module):
return x
class DepthWiseSeperableConv1d(nn.Module):
"""DepthWiseSeperableConv1d module used in Convnet module
class DepthWiseSeparableConv1d(nn.Module):
"""DepthWiseSeparableConv1d module used in ConvNet module
for the conformer, for more details see:
https://arxiv.org/pdf/2005.08100v1.pdf
@@ -390,7 +390,7 @@ class ConvModule(nn.Module):
else:
padding = (kernel_size - 1) // 2
self.dw_sep_conv_1d = DepthWiseSeperableConv1d(
self.dw_sep_conv_1d = DepthWiseSeparableConv1d(
input_dim,
depthwise_seperable_out_channel,
kernel_size,

View File

@@ -916,7 +916,7 @@ class Qwen2VLProcessingInfo(BaseProcessingInfo):
self, max_pixels: int | None = None
) -> ImageSize:
# NOTE: Simply processing a huge size with _get_vision_info might not give a
# size that maximizes the number of featrues, i.e., the number of (merged)
# size that maximizes the number of features, i.e., the number of (merged)
# patches. This is because the number of patches limits the allowed aspect
# ratios. For example, suppose the maximum number of patches is 1280. A square
# image cannot be broken down into 1280 patches, so feeding a giant square image

View File

@@ -459,14 +459,14 @@ class Step3VLProcessor:
image_inputs = {}
text_inputs = self.tokenizer(text)
else:
splitted_images_data = self._split_images(images)
split_images_data = self._split_images(images)
pixel_values_lst = []
patch_pixel_values_lst = []
patch_newline_mask_lst = []
image_repl_str_lst = []
image_repl_ids_lst = []
num_patches = []
for raw_img, img_patches, patch_newline_mask in splitted_images_data:
for raw_img, img_patches, patch_newline_mask in split_images_data:
pixel_values_lst.extend(self._convert_images_to_pixel_values([raw_img]))
if len(img_patches) > 0:

View File

@@ -353,7 +353,7 @@ class FusedMoEBlock(nn.Module):
if swiglu_limit not in (None, 0):
swiglu_limit = float(swiglu_limit)
assert swiglu_limit == 7.0, (
"Swiglu limit in fused moe block only suport 7.0 now."
"Swiglu limit in fused moe block only support 7.0 now."
)
activation = "swiglustep"
logger.debug(