[Core] Refactor _prepare_model_input_tensors - take 2 (#6164)
This commit is contained in:
@@ -3,7 +3,7 @@ from typing import List, Tuple, Type
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.attention import AttentionMetadata
|
||||
from vllm.attention import AttentionMetadata, AttentionMetadataBuilder
|
||||
from vllm.attention.backends.abstract import AttentionBackend
|
||||
from vllm.model_executor import SamplingMetadata
|
||||
from vllm.model_executor.pooling_metadata import PoolingMetadata
|
||||
@@ -26,6 +26,10 @@ class MockAttentionBackend(AttentionBackend):
|
||||
def get_metadata_cls() -> Type["AttentionMetadata"]:
|
||||
return AttentionMetadata
|
||||
|
||||
@staticmethod
|
||||
def get_builder_cls() -> Type["AttentionMetadataBuilder"]:
|
||||
raise AttentionMetadataBuilder
|
||||
|
||||
@staticmethod
|
||||
def get_kv_cache_shape(
|
||||
num_blocks: int,
|
||||
|
||||
Reference in New Issue
Block a user