[Model] Support Llama4 in vLLM (#16104)

This commit is contained in:
Lu Fang
2025-04-05 21:01:00 -07:00
committed by GitHub
parent 63375f0cdb
commit c575232395
35 changed files with 2369 additions and 142 deletions

View File

@@ -553,6 +553,9 @@ def main(args: argparse.Namespace):
intermediate_size = config.moe_intermediate_size
shard_intermediate_size = 2 * intermediate_size // args.tp_size
else:
if not hasattr(config, "hidden_size"):
# Support for llama4
config = config.text_config
# Default: Mixtral.
E = config.num_local_experts
topk = config.num_experts_per_tok