[Llama4,CI] Bring back Llama-4 bug fixes, and also fix Maverick tests (#35033)

Signed-off-by: Eldar Kurtic <you@example.com>
Co-authored-by: Eldar Kurtic <you@example.com>
This commit is contained in:
Eldar Kurtić
2026-02-23 15:04:34 +01:00
committed by GitHub
parent 8435b2e049
commit 1e8438a89a
2 changed files with 31 additions and 70 deletions

View File

@@ -305,10 +305,10 @@ def create_text_model_weights(text_config: dict[str, Any]) -> dict[str, torch.Te
# Self-attention weights (separate q, k, v projections)
weights[f"{layer_prefix}.self_attn.q_proj.weight"] = torch.randn(
hidden_size, num_attention_heads * head_dim, dtype=torch.bfloat16
num_attention_heads * head_dim, hidden_size, dtype=torch.bfloat16
)
weights[f"{layer_prefix}.self_attn.k_proj.weight"] = torch.randn(
hidden_size, num_key_value_heads * head_dim, dtype=torch.bfloat16
num_key_value_heads * head_dim, hidden_size, dtype=torch.bfloat16
)
weights[f"{layer_prefix}.self_attn.v_proj.weight"] = torch.randn(
num_key_value_heads * head_dim, hidden_size, dtype=torch.bfloat16