[Bugfix] Fix tensor parallel for qwen2 classification model (#10297)

Signed-off-by: Isotr0py <2037008807@qq.com>
2024-11-14 10:54:59 +08:00
parent ac49b59d8b
commit 15bb8330aa
2 changed files with 9 additions and 4 deletions
--- a/tests/models/embedding/language/test_cls_models.py
+++ b/tests/models/embedding/language/test_cls_models.py
@@ -21,14 +21,14 @@ def test_classification_models(
    model: str,
    dtype: str,
 ) -> None:
+    with vllm_runner(model, dtype=dtype) as vllm_model:
+        vllm_outputs = vllm_model.classify(example_prompts)
+
    with hf_runner(model,
                   dtype=dtype,
                   auto_cls=AutoModelForSequenceClassification) as hf_model:
        hf_outputs = hf_model.classify(example_prompts)

-    with vllm_runner(model, dtype=dtype) as vllm_model:
-        vllm_outputs = vllm_model.classify(example_prompts)
-
    print(hf_outputs, vllm_outputs)

    # check logits difference