[Model] Pooling models default to using chunked prefill & prefix caching if supported. (#20930)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -345,16 +345,34 @@ class EmbedModelInfo(NamedTuple):
|
||||
matryoshka_dimensions: Optional[list[int]] = None
|
||||
architecture: str = ""
|
||||
dtype: str = "auto"
|
||||
default_pooling_type: str = ""
|
||||
enable_test: bool = True
|
||||
|
||||
|
||||
class CLSPoolingEmbedModelInfo(EmbedModelInfo):
|
||||
default_pooling_type: str = "CLS"
|
||||
|
||||
|
||||
class LASTPoolingEmbedModelInfo(EmbedModelInfo):
|
||||
default_pooling_type: str = "LAST"
|
||||
|
||||
|
||||
class RerankModelInfo(NamedTuple):
|
||||
name: str
|
||||
architecture: str = ""
|
||||
dtype: str = "auto"
|
||||
default_pooling_type: str = ""
|
||||
enable_test: bool = True
|
||||
|
||||
|
||||
class CLSPoolingRerankModelInfo(RerankModelInfo):
|
||||
default_pooling_type: str = "CLS"
|
||||
|
||||
|
||||
class LASTPoolingRerankModelInfo(RerankModelInfo):
|
||||
default_pooling_type: str = "LAST"
|
||||
|
||||
|
||||
def dummy_hf_overrides(
|
||||
hf_config: PretrainedConfig,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user