[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-10 12:53:24 +08:00
committed by GitHub
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions

View File

@@ -35,10 +35,11 @@ from vllm.utils.hashing import safe_hash
if TYPE_CHECKING:
from vllm.config.model import AttnTypeStr
from vllm.config.pooler import PoolingTypeStr
from vllm.config.pooler import SequencePoolingType, TokenPoolingType
else:
AttnTypeStr = Any
PoolingTypeStr = Any
SequencePoolingType = Any
TokenPoolingType = Any
from .interfaces import (
@@ -57,7 +58,8 @@ from .interfaces import (
)
from .interfaces_base import (
get_attn_type,
get_default_pooling_type,
get_default_seq_pooling_type,
get_default_tok_pooling_type,
is_pooling_model,
is_text_generation_model,
)
@@ -548,7 +550,8 @@ class _ModelInfo:
is_text_generation_model: bool
is_pooling_model: bool
attn_type: AttnTypeStr
default_pooling_type: PoolingTypeStr
default_seq_pooling_type: SequencePoolingType
default_tok_pooling_type: TokenPoolingType
supports_cross_encoding: bool
supports_multimodal: bool
supports_multimodal_raw_input_only: bool
@@ -569,7 +572,8 @@ class _ModelInfo:
architecture=model.__name__,
is_text_generation_model=is_text_generation_model(model),
is_pooling_model=is_pooling_model(model),
default_pooling_type=get_default_pooling_type(model),
default_seq_pooling_type=get_default_seq_pooling_type(model),
default_tok_pooling_type=get_default_tok_pooling_type(model),
attn_type=get_attn_type(model),
supports_cross_encoding=supports_cross_encoding(model),
supports_multimodal=supports_multimodal(model),