Signed-off-by: Hongxia Yang <hongxiay.yang@amd.com> Co-authored-by: Hongxia Yang <hongxiay.yang@amd.com>
64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
"""Unit tests for QuarkConfig.maybe_update_config.
|
|
|
|
Fetches real HF configs (metadata only, no model weights) to verify
|
|
that dynamic_mxfp4_quant is only enabled for DeepSeek-V3-family models.
|
|
|
|
Run: pytest tests/quantization/test_quark_maybe_update_config.py -v
|
|
"""
|
|
|
|
import pytest
|
|
from transformers import AutoConfig
|
|
|
|
from vllm.model_executor.layers.quantization.quark.quark import QuarkConfig
|
|
|
|
|
|
def _make_quark_config() -> QuarkConfig:
|
|
"""Create a minimal QuarkConfig for testing."""
|
|
return QuarkConfig(quant_config={}, kv_cache_group=[], pack_method="reorder")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Non-deepseek models must not flip dynamic_mxfp4_quant
|
|
# ---------------------------------------------------------------------------
|
|
@pytest.mark.parametrize(
|
|
"model_name",
|
|
["amd/MiniMax-M2.1-MXFP4"],
|
|
)
|
|
def test_non_deepseek_model_stays_false(model_name: str):
|
|
"""Non-deepseek_v3 models must not enable dynamic_mxfp4_quant."""
|
|
hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
|
qcfg = _make_quark_config()
|
|
|
|
qcfg.maybe_update_config(model_name, hf_config=hf_config)
|
|
|
|
assert qcfg.dynamic_mxfp4_quant is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# DeepSeek-V3 family + fp4 must enable dynamic_mxfp4_quant
|
|
# ---------------------------------------------------------------------------
|
|
@pytest.mark.parametrize(
|
|
"model_name",
|
|
["amd/DeepSeek-R1-MXFP4-ASQ"],
|
|
)
|
|
def test_deepseek_family_fp4_enables_flag(model_name: str):
|
|
hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
|
qcfg = _make_quark_config()
|
|
|
|
qcfg.maybe_update_config(model_name, hf_config=hf_config)
|
|
|
|
assert qcfg.dynamic_mxfp4_quant is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Missing hf_config → warn and stay False
|
|
# ---------------------------------------------------------------------------
|
|
def test_missing_hf_config_stays_false():
|
|
qcfg = _make_quark_config()
|
|
|
|
qcfg.maybe_update_config("some/model")
|
|
|
|
assert qcfg.dynamic_mxfp4_quant is False
|