[Core] Use CpuGpuBuffer for block table tensors (#24795)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -15,6 +15,7 @@ from vllm.utils import is_pin_memory_available, make_tensor_with_pad
|
||||
from vllm.v1.pool.metadata import PoolingMetadata
|
||||
from vllm.v1.sample.logits_processor import LogitsProcessors
|
||||
from vllm.v1.sample.metadata import SamplingMetadata
|
||||
from vllm.v1.utils import CpuGpuBuffer
|
||||
from vllm.v1.worker.block_table import BlockTable, MultiGroupBlockTable
|
||||
from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
|
||||
|
||||
@@ -45,7 +46,7 @@ def _compare_objs(obj1,
|
||||
|
||||
is_same = False
|
||||
if isinstance(a, torch.Tensor):
|
||||
if (a.numel() == 0 or b.numel() == 0):
|
||||
if a.numel() == 0 or b.numel() == 0:
|
||||
is_same = (a.numel() == 0 and b.numel() == 0)
|
||||
elif torch.allclose(a, b):
|
||||
is_same = True
|
||||
@@ -61,6 +62,8 @@ def _compare_objs(obj1,
|
||||
is_same = True # if we make it here must be same
|
||||
elif a == b:
|
||||
is_same = True
|
||||
elif isinstance(a, CpuGpuBuffer):
|
||||
is_same = np.allclose(a.np, b.np) and torch.allclose(a.gpu, b.gpu)
|
||||
assert is_same, f"Attribute {attr_name} is different"\
|
||||
f" in {obj1} and {obj2}: {a} != {b}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user