[Model] Support Llama4 in vLLM (#16104)
This commit is contained in:
@@ -553,6 +553,9 @@ def main(args: argparse.Namespace):
|
||||
intermediate_size = config.moe_intermediate_size
|
||||
shard_intermediate_size = 2 * intermediate_size // args.tp_size
|
||||
else:
|
||||
if not hasattr(config, "hidden_size"):
|
||||
# Support for llama4
|
||||
config = config.text_config
|
||||
# Default: Mixtral.
|
||||
E = config.num_local_experts
|
||||
topk = config.num_experts_per_tok
|
||||
|
||||
Reference in New Issue
Block a user