Re-enable the 80 char line width limit (#3305)

This commit is contained in:
Zhuohan Li
2024-03-10 19:49:14 -07:00
committed by GitHub
parent 4b59f00e91
commit 2f8844ba08
67 changed files with 557 additions and 528 deletions

View File

@@ -119,7 +119,8 @@ class DeepseekMoE(nn.Module):
linear_method=None)
if config.n_shared_experts is not None:
intermediate_size = config.moe_intermediate_size * config.n_shared_experts
intermediate_size = (config.moe_intermediate_size *
config.n_shared_experts)
self.shared_experts = DeepseekMLP(
hidden_size=config.hidden_size,
intermediate_size=intermediate_size,
@@ -273,8 +274,9 @@ class DeepseekDecoderLayer(nn.Module):
max_position_embeddings=max_position_embeddings,
linear_method=linear_method,
)
if (config.n_routed_experts is not None and \
layer_idx >= config.first_k_dense_replace and layer_idx % config.moe_layer_freq == 0):
if (config.n_routed_experts is not None
and layer_idx >= config.first_k_dense_replace
and layer_idx % config.moe_layer_freq == 0):
self.mlp = DeepseekMoE(config=config, linear_method=linear_method)
else:
self.mlp = DeepseekMLP(