[ROCm][Bugfix] fix exception related to trust_remote_code for MiniMax-M2.1-MXFP4 (#37698)
Signed-off-by: Hongxia Yang <hongxiay.yang@amd.com> Co-authored-by: Hongxia Yang <hongxiay.yang@amd.com>
This commit is contained in:
63
tests/quantization/test_quark_maybe_update_config.py
Normal file
63
tests/quantization/test_quark_maybe_update_config.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Unit tests for QuarkConfig.maybe_update_config.
|
||||
|
||||
Fetches real HF configs (metadata only, no model weights) to verify
|
||||
that dynamic_mxfp4_quant is only enabled for DeepSeek-V3-family models.
|
||||
|
||||
Run: pytest tests/quantization/test_quark_maybe_update_config.py -v
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from transformers import AutoConfig
|
||||
|
||||
from vllm.model_executor.layers.quantization.quark.quark import QuarkConfig
|
||||
|
||||
|
||||
def _make_quark_config() -> QuarkConfig:
|
||||
"""Create a minimal QuarkConfig for testing."""
|
||||
return QuarkConfig(quant_config={}, kv_cache_group=[], pack_method="reorder")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Non-deepseek models must not flip dynamic_mxfp4_quant
|
||||
# ---------------------------------------------------------------------------
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
["amd/MiniMax-M2.1-MXFP4"],
|
||||
)
|
||||
def test_non_deepseek_model_stays_false(model_name: str):
|
||||
"""Non-deepseek_v3 models must not enable dynamic_mxfp4_quant."""
|
||||
hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
||||
qcfg = _make_quark_config()
|
||||
|
||||
qcfg.maybe_update_config(model_name, hf_config=hf_config)
|
||||
|
||||
assert qcfg.dynamic_mxfp4_quant is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek-V3 family + fp4 must enable dynamic_mxfp4_quant
|
||||
# ---------------------------------------------------------------------------
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
["amd/DeepSeek-R1-MXFP4-ASQ"],
|
||||
)
|
||||
def test_deepseek_family_fp4_enables_flag(model_name: str):
|
||||
hf_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
|
||||
qcfg = _make_quark_config()
|
||||
|
||||
qcfg.maybe_update_config(model_name, hf_config=hf_config)
|
||||
|
||||
assert qcfg.dynamic_mxfp4_quant is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Missing hf_config → warn and stay False
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_missing_hf_config_stays_false():
|
||||
qcfg = _make_quark_config()
|
||||
|
||||
qcfg.maybe_update_config("some/model")
|
||||
|
||||
assert qcfg.dynamic_mxfp4_quant is False
|
||||
Reference in New Issue
Block a user