[Core] Refactor _prepare_model_input_tensors - take 2 (#6164)

This commit is contained in:
Cody Yu
2024-07-17 09:37:16 -07:00
committed by GitHub
parent a9a2e74d21
commit 2fa4623d9e
12 changed files with 1050 additions and 470 deletions

View File

@@ -3,7 +3,7 @@ from typing import List, Tuple, Type
import torch
from vllm.attention import AttentionMetadata
from vllm.attention import AttentionMetadata, AttentionMetadataBuilder
from vllm.attention.backends.abstract import AttentionBackend
from vllm.model_executor import SamplingMetadata
from vllm.model_executor.pooling_metadata import PoolingMetadata
@@ -26,6 +26,10 @@ class MockAttentionBackend(AttentionBackend):
def get_metadata_cls() -> Type["AttentionMetadata"]:
return AttentionMetadata
@staticmethod
def get_builder_cls() -> Type["AttentionMetadataBuilder"]:
raise AttentionMetadataBuilder
@staticmethod
def get_kv_cache_shape(
num_blocks: int,