Adds method to read the pooling types from model's files (#9506)
Signed-off-by: Flavia Beo <flavia.beo@ibm.com> Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
@@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, ConfigFormat, DecodingConfig,
|
||||
VllmConfig)
|
||||
from vllm.executor.executor_base import ExecutorBase
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.pooler import PoolingType
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.config import (
|
||||
@@ -863,7 +864,7 @@ class EngineArgs:
|
||||
|
||||
parser.add_argument(
|
||||
'--pooling-type',
|
||||
choices=['LAST', 'ALL', 'CLS', 'STEP'],
|
||||
choices=[pt.name for pt in PoolingType],
|
||||
default=None,
|
||||
help='Used to configure the pooling method in the embedding model.'
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user