[UX] Suppress gloo log spam (#29250)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-11-25 20:19:35 -05:00
committed by GitHub
parent 56531b79cc
commit 8d6a89dffd
3 changed files with 63 additions and 26 deletions

View File

@@ -51,6 +51,7 @@ from vllm.distributed.utils import StatelessProcessGroup
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.network_utils import get_distributed_init_method from vllm.utils.network_utils import get_distributed_init_method
from vllm.utils.system_utils import suppress_stdout
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
direct_register_custom_op, direct_register_custom_op,
supports_custom_op, supports_custom_op,
@@ -329,7 +330,8 @@ class GroupCoordinator:
) )
# a group with `gloo` backend, to allow direct coordination between # a group with `gloo` backend, to allow direct coordination between
# processes through the CPU. # processes through the CPU.
cpu_group = torch.distributed.new_group(ranks, backend="gloo") with suppress_stdout():
cpu_group = torch.distributed.new_group(ranks, backend="gloo")
if self.rank in ranks: if self.rank in ranks:
self.ranks = ranks self.ranks = ranks
self.world_size = len(ranks) self.world_size = len(ranks)

View File

@@ -30,6 +30,7 @@ from torch.distributed.rendezvous import rendezvous
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils.network_utils import get_tcp_uri from vllm.utils.network_utils import get_tcp_uri
from vllm.utils.system_utils import suppress_stdout
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer
logger = init_logger(__name__) logger = init_logger(__name__)
@@ -427,33 +428,34 @@ def init_gloo_process_group(
Stateless init ProcessGroup with gloo backend compatible with Stateless init ProcessGroup with gloo backend compatible with
different torch versions. different torch versions.
""" """
if is_torch_equal_or_newer("2.6"): with suppress_stdout():
pg = ProcessGroup( if is_torch_equal_or_newer("2.6"):
prefix_store, pg = ProcessGroup(
group_rank, prefix_store,
group_size, group_rank,
) group_size,
else: )
options = ProcessGroup.Options(backend="gloo") else:
pg = ProcessGroup( options = ProcessGroup.Options(backend="gloo")
prefix_store, pg = ProcessGroup(
group_rank, prefix_store,
group_size, group_rank,
options, group_size,
) options,
from torch.distributed.distributed_c10d import ProcessGroupGloo )
from torch.distributed.distributed_c10d import ProcessGroupGloo
backend_class = ProcessGroupGloo( backend_class = ProcessGroupGloo(
prefix_store, group_rank, group_size, timeout=timeout prefix_store, group_rank, group_size, timeout=timeout
) )
backend_type = ProcessGroup.BackendType.GLOO backend_type = ProcessGroup.BackendType.GLOO
device = torch.device("cpu") device = torch.device("cpu")
if is_torch_equal_or_newer("2.6"): if is_torch_equal_or_newer("2.6"):
# _set_default_backend is supported in torch >= 2.6 # _set_default_backend is supported in torch >= 2.6
pg._set_default_backend(backend_type) pg._set_default_backend(backend_type)
backend_class._set_sequence_number_for_group() backend_class._set_sequence_number_for_group()
pg._register_backend(device, backend_type, backend_class) pg._register_backend(device, backend_type, backend_class)
return pg return pg

View File

@@ -56,6 +56,39 @@ def set_env_var(key: str, value: str) -> Iterator[None]:
os.environ[key] = old os.environ[key] = old
@contextlib.contextmanager
def suppress_stdout():
"""
Suppress stdout from C libraries at the file descriptor level.
Only suppresses stdout, not stderr, to preserve error messages.
Suppression is disabled when VLLM_LOGGING_LEVEL is set to DEBUG.
Example:
with suppress_stdout():
# C library calls that would normally print to stdout
torch.distributed.new_group(ranks, backend="gloo")
"""
# Don't suppress if logging level is DEBUG
if envs.VLLM_LOGGING_LEVEL == "DEBUG":
yield
return
stdout_fd = sys.stdout.fileno()
stdout_dup = os.dup(stdout_fd)
devnull_fd = os.open(os.devnull, os.O_WRONLY)
try:
sys.stdout.flush()
os.dup2(devnull_fd, stdout_fd)
yield
finally:
sys.stdout.flush()
os.dup2(stdout_dup, stdout_fd)
os.close(stdout_dup)
os.close(devnull_fd)
# File path utilities # File path utilities