[Bugfix] Fix EAGLE vocab embedding construction for Llama 70B (#19033)

Signed-off-by: Benjamin Chislett <benjamin.chislett@centml.ai>
This commit is contained in:
Benjamin Chislett
2025-06-05 22:10:08 -04:00
committed by GitHub
parent c8134bea15
commit 3465b87ef8
7 changed files with 70 additions and 47 deletions

View File

@@ -5,11 +5,11 @@ import copy
import itertools
import torch
import triton
from weight_shapes import WEIGHT_SHAPES
from vllm._custom_ops import cutlass_scaled_mm as vllm_scaled_mm
from vllm._custom_ops import scaled_fp8_quant as vllm_scaled_fp8_quant
from vllm.triton_utils import triton
@triton.testing.perf_report(