Adds method to read the pooling types from model's files (#9506)

Signed-off-by: Flavia Beo <flavia.beo@ibm.com>
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
Co-authored-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
Flávia Béo
2024-11-07 05:42:40 -03:00
committed by GitHub
parent e036e527a0
commit aa9078fa03
10 changed files with 342 additions and 25 deletions

View File

@@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, ConfigFormat, DecodingConfig,
VllmConfig)
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.model_executor.layers.pooler import PoolingType
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.platforms import current_platform
from vllm.transformers_utils.config import (
@@ -863,7 +864,7 @@ class EngineArgs:
parser.add_argument(
'--pooling-type',
choices=['LAST', 'ALL', 'CLS', 'STEP'],
choices=[pt.name for pt in PoolingType],
default=None,
help='Used to configure the pooling method in the embedding model.'
)