[CI] Revert PRs 34818 and 33600 (#34979)
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, replace
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Any, ClassVar, Generic, Protocol, TypeVar
|
||||
from typing import TYPE_CHECKING, Any, ClassVar, Generic, Protocol, TypeVar, get_args
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -144,9 +144,15 @@ class AttentionBackend(ABC):
|
||||
|
||||
@classmethod
|
||||
def supports_block_size(cls, block_size: int | None) -> bool:
|
||||
from vllm.config.cache import BlockSize
|
||||
|
||||
if block_size is None:
|
||||
return True
|
||||
|
||||
valid_sizes = get_args(BlockSize)
|
||||
if block_size not in valid_sizes:
|
||||
return False
|
||||
|
||||
supported_kernel_block_sizes = cls.get_supported_kernel_block_sizes()
|
||||
if not supported_kernel_block_sizes:
|
||||
return True
|
||||
@@ -161,17 +167,6 @@ class AttentionBackend(ABC):
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_preferred_block_size(cls, default_block_size: int = 16) -> int:
|
||||
supported_sizes = cls.get_supported_kernel_block_sizes()
|
||||
if not supported_sizes:
|
||||
return default_block_size
|
||||
|
||||
if cls.supports_block_size(default_block_size):
|
||||
return default_block_size
|
||||
|
||||
return min(s.base if isinstance(s, MultipleOf) else s for s in supported_sizes)
|
||||
|
||||
@classmethod
|
||||
def is_mla(cls) -> bool:
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user