[MoE Refactor][4/N] Marlin Fp8 Mk (#31036)

This commit is contained in:
Robert Shaw
2025-12-21 12:37:42 -05:00
committed by GitHub
parent 93cabc417c
commit b471092d3a
4 changed files with 85 additions and 63 deletions

View File

@@ -15,6 +15,7 @@ from vllm.model_executor.layers.quantization.fp8 import (
Fp8Config,
Fp8KVCacheMethod,
Fp8LinearMethod,
Fp8MoeBackend,
Fp8MoEMethod,
)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
@@ -324,7 +325,10 @@ def test_fp8_reloading(
weight_loader=default_weight_loader,
)
# Fp8LinearMethod uses use_marlin
# Fp8MoEMethod uses fp8_backend
method.use_marlin = use_marlin
method.fp8_backend = Fp8MoeBackend.MARLIN if use_marlin else None
# capture weights format during loading
original_metadata = [