[V1][Spec Decode] Eagle Model loading (#16035)

Signed-off-by: LiuXiaoxuanPKU <lilyliupku@gmail.com>
This commit is contained in:
Lily Liu
2025-04-10 11:21:48 -07:00
committed by GitHub
parent 9665313c39
commit e8224f3dca
9 changed files with 251 additions and 28 deletions

View File

@@ -76,6 +76,7 @@ def main():
max_num_seqs=args.max_num_seqs,
gpu_memory_utilization=0.8,
speculative_config={
"method": "eagle",
"model": eagle_dir,
"num_speculative_tokens": args.num_spec_tokens,
"draft_tensor_parallel_size": args.draft_tp,