[Minor] Remove unnecessary error message (#27115)

Signed-off-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
Zhuohan Li
2025-10-17 13:02:12 -07:00
committed by GitHub
parent 950cf9e58e
commit d29483b58a
2 changed files with 19 additions and 55 deletions

View File

@@ -34,7 +34,6 @@ from vllm.model_executor.parameter import (
)
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.utils import GiB_bytes
logger = init_logger(__name__)
@@ -211,33 +210,17 @@ class UnquantizedLinearMethod(LinearMethodBase):
# The weights are not quantized, and they are not sharded.
# The amount of memory allocated for the weights is
# sum(output_partition_sizes) * input_size_per_partition.
try:
weight_loader = extra_weight_attrs.pop("weight_loader")
weight = ModelWeightParameter(
data=torch.empty(
sum(output_partition_sizes),
input_size_per_partition,
dtype=params_dtype,
),
input_dim=1,
output_dim=0,
weight_loader=weight_loader,
)
except torch.cuda.OutOfMemoryError as e:
logger.error("Failed to create unquantized linear weights: %s", e)
if torch.cuda.is_available():
logger.debug("CUDA device: %s", torch.cuda.current_device())
logger.debug(
"Allocated: %.2f GiB", torch.cuda.memory_allocated() / GiB_bytes
)
logger.debug(
"Reserved: %.2f GiB", torch.cuda.memory_reserved() / GiB_bytes
)
raise RuntimeError(
"Failed to create unquantized linear weights. "
"This may be caused by insufficient memory to allocate "
"the weight."
) from e
weight_loader = extra_weight_attrs.pop("weight_loader")
weight = ModelWeightParameter(
data=torch.empty(
sum(output_partition_sizes),
input_size_per_partition,
dtype=params_dtype,
),
input_dim=1,
output_dim=0,
weight_loader=weight_loader,
)
layer.register_parameter("weight", weight)
set_weight_attrs(weight, extra_weight_attrs)