[Model] Add MiMo-V2-Flash support (#30836)

Signed-off-by: Abatom <abzhonghua@gmail.com>
Signed-off-by: Jumiar <liuanqim10@126.com>
Signed-off-by: Zyann7 <zyann7@outlook.com>
Co-authored-by: Jumiar <liuanqim10@126.com>
Co-authored-by: Zyann7 <zyann7@outlook.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Zhonghua Deng
2025-12-20 01:17:03 +08:00
committed by GitHub
parent 268a972c62
commit 969bbc7c61
8 changed files with 789 additions and 13 deletions

View File

@@ -1252,6 +1252,14 @@ def validate_fp8_block_shape(
"""Validate block quantization shapes for tensor parallelism."""
from vllm.distributed import get_tensor_model_parallel_world_size
if getattr(layer, "allow_fp8_block_shape_mismatch", False):
logger.debug(
"Skipping FP8 block shape validation for layer %s due to detected"
" mismatch allowance.",
getattr(layer, "prefix", "<unknown>"),
)
return
tp_size = getattr(layer, "tp_size", get_tensor_model_parallel_world_size())
block_n, block_k = block_size[0], block_size[1]