Move PoolerConfig from config/__init__.py to config/pooler.py (#25181)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-09-19 12:02:55 +01:00
committed by GitHub
parent 1dfea5f4a9
commit 058525b997
14 changed files with 193 additions and 179 deletions

View File

@@ -42,7 +42,7 @@ python client.py
### Server Configuration
The key parameters for chunked processing are in the `--override-pooler-config`:
The key parameters for chunked processing are in the `--pooler-config`:
```json
{

View File

@@ -13,7 +13,7 @@ Prerequisites:
# MEAN pooling (processes all chunks, recommended for complete coverage)
vllm serve intfloat/multilingual-e5-large \
--override-pooler-config \
--pooler-config \
'{"pooling_type": "MEAN", "normalize": true, ' \
'"enable_chunked_processing": true, "max_embed_len": 3072000}' \
--served-model-name multilingual-e5-large \
@@ -23,7 +23,7 @@ Prerequisites:
# OR CLS pooling (native CLS within chunks, MEAN aggregation across chunks)
vllm serve BAAI/bge-large-en-v1.5 \
--override-pooler-config \
--pooler-config \
'{"pooling_type": "CLS", "normalize": true, ' \
'"enable_chunked_processing": true, "max_embed_len": 1048576}' \
--served-model-name bge-large-en-v1.5 \

View File

@@ -103,7 +103,7 @@ POOLER_CONFIG="{\"pooling_type\": \"$POOLING_TYPE\", \"normalize\": true, \"enab
vllm serve "$MODEL_NAME" \
--tensor-parallel-size "$GPU_COUNT" \
--enforce-eager \
--override-pooler-config "$POOLER_CONFIG" \
--pooler-config "$POOLER_CONFIG" \
--served-model-name ${MODEL_CODE} \
--api-key "$API_KEY" \
--trust-remote-code \