[Hybrid Allocator] Support Pipeline Parallel (#23974)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-09-14 15:55:17 -07:00
committed by GitHub
parent 90f3f7d73e
commit 8e5cdcda4e
7 changed files with 472 additions and 235 deletions

View File

@@ -215,9 +215,7 @@ TEXT_GENERATION_MODELS = {
EMBEDDING_MODELS = { # type: ignore[var-annotated]
# [Text-only]
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(runner="pooling"),
# TODO: re-enable when https://github.com/vllm-project/vllm/issues/23883
# is fixed
#"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(runner="pooling"),
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(runner="pooling"),
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(
load_format="dummy", runner="pooling"
),