[Misc] Move print_*_once from utils to logger (#11298)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com> Co-authored-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com>
This commit is contained in:
@@ -28,7 +28,6 @@ from vllm.model_executor.parameter import (BlockQuantScaleParameter,
|
||||
PerTensorScaleParameter)
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import print_warning_once
|
||||
|
||||
ACTIVATION_SCHEMES = ["static", "dynamic"]
|
||||
|
||||
@@ -539,10 +538,10 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
||||
"activation scales are None.")
|
||||
if (not all_close_1d(layer.w13_input_scale)
|
||||
or not all_close_1d(layer.w2_input_scale)):
|
||||
print_warning_once(
|
||||
logger.warning_once(
|
||||
"Found input_scales that are not equal for "
|
||||
"fp8 MoE layer. Using the maximum across experts "
|
||||
"for each layer. ")
|
||||
"for each layer.")
|
||||
layer.w13_input_scale = torch.nn.Parameter(
|
||||
layer.w13_input_scale.max(), requires_grad=False)
|
||||
layer.w2_input_scale = torch.nn.Parameter(
|
||||
|
||||
Reference in New Issue
Block a user