[Bugfix] Fix tensor parallel for qwen2 classification model (#10297)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2024-11-14 10:54:59 +08:00
committed by GitHub
parent ac49b59d8b
commit 15bb8330aa
2 changed files with 9 additions and 4 deletions

View File

@@ -21,14 +21,14 @@ def test_classification_models(
model: str,
dtype: str,
) -> None:
with vllm_runner(model, dtype=dtype) as vllm_model:
vllm_outputs = vllm_model.classify(example_prompts)
with hf_runner(model,
dtype=dtype,
auto_cls=AutoModelForSequenceClassification) as hf_model:
hf_outputs = hf_model.classify(example_prompts)
with vllm_runner(model, dtype=dtype) as vllm_model:
vllm_outputs = vllm_model.classify(example_prompts)
print(hf_outputs, vllm_outputs)
# check logits difference