[ci] try to add multi-node tests (#6280)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai> Co-authored-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
2024-07-12 21:51:48 -07:00
parent d80aef3776
commit 41708e5034
13 changed files with 229 additions and 274 deletions
--- a/tests/tensorizer_loader/test_tensorizer.py
+++ b/tests/tensorizer_loader/test_tensorizer.py
@@ -6,7 +6,6 @@ from unittest.mock import MagicMock, patch

 import openai
 import pytest
-import ray
 import torch
 from tensorizer import EncryptionParams

@@ -22,7 +21,7 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
                                                         tensorize_vllm_model)

 from ..conftest import VllmRunner, cleanup
-from ..utils import VLLM_PATH, RemoteOpenAIServer
+from ..utils import RemoteOpenAIServer

 # yapf conflicts with isort for this docstring

@@ -220,23 +219,21 @@ def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
        json.dumps(model_loader_extra_config),
    ]

-    ray.init(runtime_env={"working_dir": VLLM_PATH})
+    with RemoteOpenAIServer(openai_args) as server:
+        print("Server ready.")

-    server = RemoteOpenAIServer(openai_args)
-    print("Server ready.")
+        client = server.get_client()
+        completion = client.completions.create(model=model_ref,
+                                            prompt="Hello, my name is",
+                                            max_tokens=5,
+                                            temperature=0.0)

-    client = server.get_client()
-    completion = client.completions.create(model=model_ref,
-                                           prompt="Hello, my name is",
-                                           max_tokens=5,
-                                           temperature=0.0)
-
-    assert completion.id is not None
-    assert len(completion.choices) == 1
-    assert len(completion.choices[0].text) >= 5
-    assert completion.choices[0].finish_reason == "length"
-    assert completion.usage == openai.types.CompletionUsage(
-        completion_tokens=5, prompt_tokens=6, total_tokens=11)
+        assert completion.id is not None
+        assert len(completion.choices) == 1
+        assert len(completion.choices[0].text) >= 5
+        assert completion.choices[0].finish_reason == "length"
+        assert completion.usage == openai.types.CompletionUsage(
+            completion_tokens=5, prompt_tokens=6, total_tokens=11)


 def test_raise_value_error_on_invalid_load_format(vllm_runner):
@@ -282,7 +279,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
    base_model.model.llm_engine.model_executor.shutdown()
    del base_model
    cleanup()
-    ray.shutdown()

    # load model with two shards and serialize with encryption
    model_path = str(tmp_path / (model_ref + "-%02d.tensors"))
@@ -305,7 +301,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
    assert os.path.isfile(model_path % 0), "Serialization subprocess failed"
    assert os.path.isfile(model_path % 1), "Serialization subprocess failed"
    cleanup()
-    ray.shutdown()

    loaded_vllm_model = vllm_runner(
        model_ref,