[Model] Pooling models default to using chunked prefill & prefix caching if supported. (#20930)
Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
@@ -182,8 +182,8 @@ def as_seq_cls_model(cls: _T) -> _T:
|
||||
assert pooler_config is not None
|
||||
|
||||
pooling_type_str = pooler_config.pooling_type
|
||||
pooling_type = (PoolingType.LAST if pooling_type_str is None else
|
||||
PoolingType[pooling_type_str])
|
||||
assert pooling_type_str is not None
|
||||
pooling_type = PoolingType[pooling_type_str]
|
||||
|
||||
self.pooler = DispatchPooler({
|
||||
"encode":
|
||||
|
||||
Reference in New Issue
Block a user