Re-enable the 80 char line width limit (#3305)
This commit is contained in:
@@ -119,7 +119,8 @@ class DeepseekMoE(nn.Module):
|
||||
linear_method=None)
|
||||
|
||||
if config.n_shared_experts is not None:
|
||||
intermediate_size = config.moe_intermediate_size * config.n_shared_experts
|
||||
intermediate_size = (config.moe_intermediate_size *
|
||||
config.n_shared_experts)
|
||||
self.shared_experts = DeepseekMLP(
|
||||
hidden_size=config.hidden_size,
|
||||
intermediate_size=intermediate_size,
|
||||
@@ -273,8 +274,9 @@ class DeepseekDecoderLayer(nn.Module):
|
||||
max_position_embeddings=max_position_embeddings,
|
||||
linear_method=linear_method,
|
||||
)
|
||||
if (config.n_routed_experts is not None and \
|
||||
layer_idx >= config.first_k_dense_replace and layer_idx % config.moe_layer_freq == 0):
|
||||
if (config.n_routed_experts is not None
|
||||
and layer_idx >= config.first_k_dense_replace
|
||||
and layer_idx % config.moe_layer_freq == 0):
|
||||
self.mlp = DeepseekMoE(config=config, linear_method=linear_method)
|
||||
else:
|
||||
self.mlp = DeepseekMLP(
|
||||
|
||||
Reference in New Issue
Block a user