[CI/Build] Move test_utils.py to tests/utils.py (#4425)

Since #4335 was merged, I've noticed that the definition of ServerRunner in the tests is the same as in the test for OpenAI API. I have moved the class to the test utilities to avoid code duplication. (Although it only has been repeated twice so far, I will add another similar test suite in #4200 which would duplicate the code a third time) Also, I have moved the test utilities file (test_utils.py) to under the test directory (tests/utils.py), since none of its code is actually used in the main package. Note that I have added __init__.py to each test subpackage and updated the ray.init() call in the test utilities file in order to relative import tests/utils.py.
2024-05-13 22:50:09 +08:00
parent 702bee461f
commit 350f9e107f
34 changed files with 138 additions and 164 deletions
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -1,10 +1,6 @@
 # imports for guided decoding tests
 import json
-import os
 import re
-import subprocess
-import sys
-import time

 import jsonschema
 import openai  # use the official client for correctness check
@@ -12,7 +8,6 @@ import pytest
 # using Ray for overall ease of process management, parallel requests,
 # and debugging.
 import ray
-import requests
 import torch
 # downloading lora to test lora requests
 from huggingface_hub import snapshot_download
@@ -20,7 +15,8 @@ from openai import BadRequestError

 from vllm.transformers_utils.tokenizer import get_tokenizer

-MAX_SERVER_START_WAIT_S = 600  # wait for server to start for 60 seconds
+from ..utils import ServerRunner
+
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
@@ -78,45 +74,6 @@ TEST_CHOICE = [
 pytestmark = pytest.mark.asyncio


-@ray.remote(num_gpus=1)
-class ServerRunner:
-
-    def __init__(self, args):
-        env = os.environ.copy()
-        env["PYTHONUNBUFFERED"] = "1"
-        self.proc = subprocess.Popen(
-            ["python3", "-m", "vllm.entrypoints.openai.api_server"] + args,
-            env=env,
-            stdout=sys.stdout,
-            stderr=sys.stderr,
-        )
-        self._wait_for_server()
-
-    def ready(self):
-        return True
-
-    def _wait_for_server(self):
-        # run health check
-        start = time.time()
-        while True:
-            try:
-                if requests.get(
-                        "http://localhost:8000/health").status_code == 200:
-                    break
-            except Exception as err:
-                if self.proc.poll() is not None:
-                    raise RuntimeError("Server exited unexpectedly.") from err
-
-                time.sleep(0.5)
-                if time.time() - start > MAX_SERVER_START_WAIT_S:
-                    raise RuntimeError(
-                        "Server failed to start in time.") from err
-
-    def __del__(self):
-        if hasattr(self, "proc"):
-            self.proc.terminate()
-
-
@pytest.fixture(scope="session")
 def zephyr_lora_files():
    return snapshot_download(repo_id=LORA_NAME)