[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-10 12:53:24 +08:00
committed by GitHub
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions

View File

@@ -11,6 +11,7 @@ import torch.nn.functional as F
from transformers import PretrainedConfig
from vllm.config.model import AttnTypeStr, ModelConfig, ModelDType, RunnerOption
from vllm.config.pooler import SequencePoolingType, TokenPoolingType
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
from vllm.multimodal.processing import InputProcessingContext
from vllm.tokenizers import cached_tokenizer_from_config
@@ -379,7 +380,8 @@ class ModelInfo:
max_model_len: int | None = None
hf_dtype: str = "float32"
hf_overrides: dict[str, Any] | None = None
pooling_type: str | None = None
seq_pooling_type: SequencePoolingType | None = None
tok_pooling_type: TokenPoolingType | None = None
attn_type: AttnTypeStr | None = None
is_prefix_caching_supported: bool | None = None
is_chunked_prefill_supported: bool | None = None