[Refactor] Separate sequence and token pooling types (#32026)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-01-10 12:53:24 +08:00
parent 52d428295d
commit 583a90e005
42 changed files with 324 additions and 204 deletions
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -11,6 +11,7 @@ import torch.nn.functional as F
 from transformers import PretrainedConfig

 from vllm.config.model import AttnTypeStr, ModelConfig, ModelDType, RunnerOption
+from vllm.config.pooler import SequencePoolingType, TokenPoolingType
 from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
 from vllm.multimodal.processing import InputProcessingContext
 from vllm.tokenizers import cached_tokenizer_from_config
@@ -379,7 +380,8 @@ class ModelInfo:
    max_model_len: int | None = None
    hf_dtype: str = "float32"
    hf_overrides: dict[str, Any] | None = None
-    pooling_type: str | None = None
+    seq_pooling_type: SequencePoolingType | None = None
+    tok_pooling_type: TokenPoolingType | None = None
    attn_type: AttnTypeStr | None = None
    is_prefix_caching_supported: bool | None = None
    is_chunked_prefill_supported: bool | None = None