[Bugfix] config.head_dim is now explicitly set to None (#18432)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
Gregory Shtrasberg
2025-05-21 00:04:33 -04:00
committed by GitHub
parent 3b17ea26e4
commit 0c15c2e486
8 changed files with 27 additions and 18 deletions

View File

@@ -604,8 +604,9 @@ class MiniMaxText01DecoderLayer(nn.Module):
rope_theta = getattr(config, "rope_theta", 10000)
head_dim = getattr(config, "head_dim",
config.hidden_size // config.num_attention_heads)
head_dim = getattr(config, "head_dim", None)
if head_dim is None:
head_dim = config.hidden_size // config.num_attention_heads
if hasattr(config, "max_model_len") and isinstance(
config.max_model_len, int):
max_position_embeddings = min(config.max_position_embeddings,
@@ -861,8 +862,9 @@ class MiniMaxText01Model(nn.Module):
cache_shape=self.cache_shape)
rope_theta = getattr(config, "rope_theta", 10000)
head_dim = getattr(config, "head_dim",
config.hidden_size // config.num_attention_heads)
head_dim = getattr(config, "head_dim", None)
if head_dim is None:
head_dim = config.hidden_size // config.num_attention_heads
if hasattr(config, "max_model_len") and isinstance(
config.max_model_len, int):
max_position_embeddings = min(config.max_position_embeddings,