fix logging msg for block manager (#3701)

This commit is contained in:
Simon Mo
2024-03-28 16:29:55 -07:00
committed by GitHub
parent c0935c96d3
commit 4716a32dd4
3 changed files with 5 additions and 4 deletions

View File

@@ -41,6 +41,8 @@ def _can_use_flash_attn(dtype: torch.dtype) -> bool:
try: try:
import flash_attn # noqa: F401 import flash_attn # noqa: F401
except ImportError: except ImportError:
logger.info("flash_attn is not found.") logger.info(
"Cannot use FlashAttention because the package is not found. "
"Please install it for better performance.")
return False return False
return True return True

View File

@@ -230,13 +230,12 @@ class BlockSpaceManagerV1(BlockSpaceManager):
self.watermark_blocks = int(watermark * num_gpu_blocks) self.watermark_blocks = int(watermark * num_gpu_blocks)
if self.enable_caching: if self.enable_caching:
logger.info("enable automatic prefix caching") logger.info("Automatic prefix caching is enabled.")
self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size, self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
num_gpu_blocks) num_gpu_blocks)
self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size, self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
num_cpu_blocks) num_cpu_blocks)
else: else:
logger.info("disable automatic prefix caching")
self.gpu_allocator = UncachedBlockAllocator( self.gpu_allocator = UncachedBlockAllocator(
Device.GPU, block_size, num_gpu_blocks) Device.GPU, block_size, num_gpu_blocks)
self.cpu_allocator = UncachedBlockAllocator( self.cpu_allocator = UncachedBlockAllocator(

View File

@@ -10,7 +10,6 @@ logger = logging.getLogger(__name__)
try: try:
from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator, from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator,
ncclGetVersion) ncclGetVersion)
logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
except Exception as e: except Exception as e:
# in non-NVIDIA environments, we can't import the nccl module # in non-NVIDIA environments, we can't import the nccl module
# e.g. when running on machines with AMD GPUs # e.g. when running on machines with AMD GPUs
@@ -40,6 +39,7 @@ def init_process_group(world_size: int, local_rank: int, rank: int,
init_method: str) -> None: init_method: str) -> None:
assert not is_initialized() assert not is_initialized()
global comm global comm
logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
comm = NCCLCommunicator(init_method=init_method, comm = NCCLCommunicator(init_method=init_method,
world_size=world_size, world_size=world_size,
local_rank=local_rank, local_rank=local_rank,