[Kernel] Expand MoE weight loading + Add Fused Marlin MoE Kernel (#7527)
Co-authored-by: ElizaWszola <eliza@neuralmagic.com>
This commit is contained in:
@@ -920,7 +920,7 @@ class JambaForCausalLM(nn.Module, HasInnerState):
|
||||
weight_loader = param.weight_loader
|
||||
weight_loader(param,
|
||||
loaded_weight,
|
||||
weight_name,
|
||||
name,
|
||||
shard_id=shard_id,
|
||||
expert_id=expert_id)
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user