[Misc] Support FP8 MoE for compressed-tensors (#8588)

This commit is contained in:
Michael Goin
2024-09-25 12:43:36 -04:00
committed by GitHub
parent 64840dfae4
commit 873edda6cf
5 changed files with 226 additions and 8 deletions

View File

@@ -321,13 +321,13 @@ class PhiMoEAttention(nn.Module):
self.total_num_heads,
self.total_num_kv_heads,
bias=True,
quant_config=None,
quant_config=quant_config,
)
self.o_proj = RowParallelLinear(
self.total_num_heads * self.head_dim,
hidden_size,
bias=True,
quant_config=None,
quant_config=quant_config,
)
self.rotary_emb = get_rope(
self.head_dim,