diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/__init__.py b/vllm/distributed/kv_transfer/kv_connector/v1/__init__.py index 0e16bc5cc..47329207f 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/__init__.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/__init__.py @@ -6,7 +6,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import ( SupportsHMA, supports_hma, ) -from vllm.distributed.kv_transfer.kv_connector.v1.decode_bench_connector import ( # noqa E:501 +from vllm.distributed.kv_transfer.kv_connector.v1.decode_bench_connector import ( # noqa: E501 DecodeBenchConnector, ) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py index 19d62fecd..d4a99cf09 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/example_connector.py @@ -145,7 +145,6 @@ class ExampleConnector(KVConnectorBase_V1): num_pages * page_size, -1 ) dst_kv_cache_layer[slot_mapping, ...] = src_kv_cache - dst_kv_cache_layer.reshape(dst_kv_cache_layer_shape) else: num_pages = dst_kv_cache_layer_shape[1] page_size = dst_kv_cache_layer_shape[2] @@ -153,18 +152,11 @@ class ExampleConnector(KVConnectorBase_V1): 2, num_pages * page_size, -1 ) dst_kv_cache_layer[:, slot_mapping, ...] = src_kv_cache - dst_kv_cache_layer.reshape(dst_kv_cache_layer_shape) # Get the metadata metadata: KVConnectorMetadata = self._get_connector_metadata() assert isinstance(metadata, ExampleConnectorMetadata) - if metadata is None: - logger.warning( - "In connector.start_load_kv, but the connector metadata is None" - ) - return - attn_metadata = forward_context.attn_metadata if attn_metadata is None: logger.warning("In connector.start_load_kv, but the attn_metadata is None") diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py index 0379011e7..fc31836aa 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/lmcache_mp_connector.py @@ -36,7 +36,6 @@ except ImportError: ) if TYPE_CHECKING: - from vllm.config import VllmConfig from vllm.distributed.kv_events import KVCacheEvent from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( KVConnectorPromMetrics, diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py index 73922a6fb..fd99c1a74 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py @@ -85,7 +85,7 @@ class OffloadingConnectorStats(KVConnectorStats): for transfer_type, ops_list in self.data.items(): assert isinstance(ops_list, list) total_bytes = 0 - total_time = 0 + total_time = 0.0 for op in ops_list: assert isinstance(op, dict) total_bytes += op["op_size"]