[Misc] Move print_*_once from utils to logger (#11298)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com>
Co-authored-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com>
This commit is contained in:
Cyrus Leung
2025-01-09 12:48:12 +08:00
committed by GitHub
parent 730e9592e9
commit d848800e88
21 changed files with 129 additions and 72 deletions

View File

@@ -32,8 +32,7 @@ from vllm.transformers_utils.config import (
from vllm.transformers_utils.s3_utils import S3Model
from vllm.transformers_utils.utils import is_s3
from vllm.utils import (GiB_bytes, LayerBlockType, cuda_device_count_stateless,
get_cpu_memory, print_warning_once, random_uuid,
resolve_obj_by_qualname)
get_cpu_memory, random_uuid, resolve_obj_by_qualname)
if TYPE_CHECKING:
from ray.util.placement_group import PlacementGroup
@@ -314,7 +313,7 @@ class ModelConfig:
sliding_window_len_min = get_min_sliding_window(
self.hf_text_config.sliding_window)
print_warning_once(
logger.warning_once(
f"{self.hf_text_config.model_type} has interleaved "
"attention, which is currently not supported by the "
"XFORMERS backend. Disabling sliding window and capping "
@@ -2758,7 +2757,7 @@ class CompilationConfig(BaseModel):
def model_post_init(self, __context: Any) -> None:
if not self.enable_reshape and self.enable_fusion:
print_warning_once(
logger.warning_once(
"Fusion enabled but reshape elimination disabled."
"RMSNorm + quant (fp8) fusion might not work")
@@ -3151,7 +3150,7 @@ class VllmConfig:
self.scheduler_config.chunked_prefill_enabled and \
self.model_config.dtype == torch.float32 and \
current_platform.get_device_capability() == (7, 5):
print_warning_once(
logger.warning_once(
"Turing devices tensor cores do not support float32 matmul. "
"To workaround this limitation, vLLM will set 'ieee' input "
"precision for chunked prefill triton kernels.")