Support embedding models in V1 (#16188)
Signed-off-by: Max de Bayser <mbayser@br.ibm.com> Signed-off-by: Max de Bayser <maxdebayser@gmail.com> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
4959915089
commit
799397ee4f
@@ -31,7 +31,7 @@ class TestSetting:
|
||||
# basic llama model
|
||||
TestSetting(
|
||||
model="meta-llama/Llama-3.2-1B-Instruct",
|
||||
model_args=[],
|
||||
model_args=["--max-model-len", "2048"],
|
||||
pp_size=2,
|
||||
tp_size=2,
|
||||
attn_backend="FLASHINFER",
|
||||
@@ -41,7 +41,7 @@ class TestSetting:
|
||||
# llama model with quantization
|
||||
TestSetting(
|
||||
model="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
|
||||
model_args=["--quantization", "gptq"],
|
||||
model_args=["--quantization", "gptq", "--max-model-len", "2048"],
|
||||
pp_size=1,
|
||||
tp_size=1,
|
||||
attn_backend="FLASH_ATTN",
|
||||
@@ -51,7 +51,7 @@ class TestSetting:
|
||||
# MoE model
|
||||
TestSetting(
|
||||
model="ibm/PowerMoE-3b",
|
||||
model_args=[],
|
||||
model_args=["--max-model-len", "2048"],
|
||||
pp_size=1,
|
||||
tp_size=2,
|
||||
attn_backend="FLASH_ATTN",
|
||||
@@ -61,23 +61,27 @@ class TestSetting:
|
||||
# embedding model
|
||||
TestSetting(
|
||||
model="BAAI/bge-multilingual-gemma2",
|
||||
model_args=["--task", "embed", "--dtype", "bfloat16"],
|
||||
model_args=[
|
||||
"--task", "embed", "--dtype", "bfloat16", "--max-model-len",
|
||||
"2048"
|
||||
],
|
||||
pp_size=1,
|
||||
tp_size=1,
|
||||
attn_backend="FLASH_ATTN",
|
||||
method="encode",
|
||||
fullgraph=True,
|
||||
),
|
||||
# encoder-based embedding model (BERT)
|
||||
TestSetting(
|
||||
model="BAAI/bge-base-en-v1.5",
|
||||
model_args=["--task", "embed"],
|
||||
pp_size=1,
|
||||
tp_size=1,
|
||||
attn_backend="XFORMERS",
|
||||
method="encode",
|
||||
fullgraph=True,
|
||||
),
|
||||
# TODO: bert models are not supported in V1 yet
|
||||
# # encoder-based embedding model (BERT)
|
||||
# TestSetting(
|
||||
# model="BAAI/bge-base-en-v1.5",
|
||||
# model_args=["--task", "embed"],
|
||||
# pp_size=1,
|
||||
# tp_size=1,
|
||||
# attn_backend="XFORMERS",
|
||||
# method="encode",
|
||||
# fullgraph=True,
|
||||
# ),
|
||||
# vision language model
|
||||
TestSetting(
|
||||
model="microsoft/Phi-3.5-vision-instruct",
|
||||
|
||||
Reference in New Issue
Block a user