[KVConnector] Clean up redundant code in KV connectors (#34147)
Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
This commit is contained in:
@@ -6,7 +6,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
|||||||
SupportsHMA,
|
SupportsHMA,
|
||||||
supports_hma,
|
supports_hma,
|
||||||
)
|
)
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.decode_bench_connector import ( # noqa E:501
|
from vllm.distributed.kv_transfer.kv_connector.v1.decode_bench_connector import ( # noqa: E501
|
||||||
DecodeBenchConnector,
|
DecodeBenchConnector,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -145,7 +145,6 @@ class ExampleConnector(KVConnectorBase_V1):
|
|||||||
num_pages * page_size, -1
|
num_pages * page_size, -1
|
||||||
)
|
)
|
||||||
dst_kv_cache_layer[slot_mapping, ...] = src_kv_cache
|
dst_kv_cache_layer[slot_mapping, ...] = src_kv_cache
|
||||||
dst_kv_cache_layer.reshape(dst_kv_cache_layer_shape)
|
|
||||||
else:
|
else:
|
||||||
num_pages = dst_kv_cache_layer_shape[1]
|
num_pages = dst_kv_cache_layer_shape[1]
|
||||||
page_size = dst_kv_cache_layer_shape[2]
|
page_size = dst_kv_cache_layer_shape[2]
|
||||||
@@ -153,18 +152,11 @@ class ExampleConnector(KVConnectorBase_V1):
|
|||||||
2, num_pages * page_size, -1
|
2, num_pages * page_size, -1
|
||||||
)
|
)
|
||||||
dst_kv_cache_layer[:, slot_mapping, ...] = src_kv_cache
|
dst_kv_cache_layer[:, slot_mapping, ...] = src_kv_cache
|
||||||
dst_kv_cache_layer.reshape(dst_kv_cache_layer_shape)
|
|
||||||
|
|
||||||
# Get the metadata
|
# Get the metadata
|
||||||
metadata: KVConnectorMetadata = self._get_connector_metadata()
|
metadata: KVConnectorMetadata = self._get_connector_metadata()
|
||||||
assert isinstance(metadata, ExampleConnectorMetadata)
|
assert isinstance(metadata, ExampleConnectorMetadata)
|
||||||
|
|
||||||
if metadata is None:
|
|
||||||
logger.warning(
|
|
||||||
"In connector.start_load_kv, but the connector metadata is None"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
attn_metadata = forward_context.attn_metadata
|
attn_metadata = forward_context.attn_metadata
|
||||||
if attn_metadata is None:
|
if attn_metadata is None:
|
||||||
logger.warning("In connector.start_load_kv, but the attn_metadata is None")
|
logger.warning("In connector.start_load_kv, but the attn_metadata is None")
|
||||||
|
|||||||
@@ -36,7 +36,6 @@ except ImportError:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from vllm.config import VllmConfig
|
|
||||||
from vllm.distributed.kv_events import KVCacheEvent
|
from vllm.distributed.kv_events import KVCacheEvent
|
||||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||||
KVConnectorPromMetrics,
|
KVConnectorPromMetrics,
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class OffloadingConnectorStats(KVConnectorStats):
|
|||||||
for transfer_type, ops_list in self.data.items():
|
for transfer_type, ops_list in self.data.items():
|
||||||
assert isinstance(ops_list, list)
|
assert isinstance(ops_list, list)
|
||||||
total_bytes = 0
|
total_bytes = 0
|
||||||
total_time = 0
|
total_time = 0.0
|
||||||
for op in ops_list:
|
for op in ops_list:
|
||||||
assert isinstance(op, dict)
|
assert isinstance(op, dict)
|
||||||
total_bytes += op["op_size"]
|
total_bytes += op["op_size"]
|
||||||
|
|||||||
Reference in New Issue
Block a user