fix logging msg for block manager (#3701)
This commit is contained in:
@@ -41,6 +41,8 @@ def _can_use_flash_attn(dtype: torch.dtype) -> bool:
|
|||||||
try:
|
try:
|
||||||
import flash_attn # noqa: F401
|
import flash_attn # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.info("flash_attn is not found.")
|
logger.info(
|
||||||
|
"Cannot use FlashAttention because the package is not found. "
|
||||||
|
"Please install it for better performance.")
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|||||||
@@ -230,13 +230,12 @@ class BlockSpaceManagerV1(BlockSpaceManager):
|
|||||||
self.watermark_blocks = int(watermark * num_gpu_blocks)
|
self.watermark_blocks = int(watermark * num_gpu_blocks)
|
||||||
|
|
||||||
if self.enable_caching:
|
if self.enable_caching:
|
||||||
logger.info("enable automatic prefix caching")
|
logger.info("Automatic prefix caching is enabled.")
|
||||||
self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
|
self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
|
||||||
num_gpu_blocks)
|
num_gpu_blocks)
|
||||||
self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
|
self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
|
||||||
num_cpu_blocks)
|
num_cpu_blocks)
|
||||||
else:
|
else:
|
||||||
logger.info("disable automatic prefix caching")
|
|
||||||
self.gpu_allocator = UncachedBlockAllocator(
|
self.gpu_allocator = UncachedBlockAllocator(
|
||||||
Device.GPU, block_size, num_gpu_blocks)
|
Device.GPU, block_size, num_gpu_blocks)
|
||||||
self.cpu_allocator = UncachedBlockAllocator(
|
self.cpu_allocator = UncachedBlockAllocator(
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ logger = logging.getLogger(__name__)
|
|||||||
try:
|
try:
|
||||||
from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator,
|
from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator,
|
||||||
ncclGetVersion)
|
ncclGetVersion)
|
||||||
logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# in non-NVIDIA environments, we can't import the nccl module
|
# in non-NVIDIA environments, we can't import the nccl module
|
||||||
# e.g. when running on machines with AMD GPUs
|
# e.g. when running on machines with AMD GPUs
|
||||||
@@ -40,6 +39,7 @@ def init_process_group(world_size: int, local_rank: int, rank: int,
|
|||||||
init_method: str) -> None:
|
init_method: str) -> None:
|
||||||
assert not is_initialized()
|
assert not is_initialized()
|
||||||
global comm
|
global comm
|
||||||
|
logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
|
||||||
comm = NCCLCommunicator(init_method=init_method,
|
comm = NCCLCommunicator(init_method=init_method,
|
||||||
world_size=world_size,
|
world_size=world_size,
|
||||||
local_rank=local_rank,
|
local_rank=local_rank,
|
||||||
|
|||||||
Reference in New Issue
Block a user