[Bugfix]: serialize config by value for --trust-remote-code (#6751)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2024-10-21 20:46:24 -06:00
parent 76a5e13270
commit b729901139
4 changed files with 103 additions and 28 deletions
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -28,19 +28,25 @@ class ParallelSetup(NamedTuple):
    chunked_prefill: bool


+class PPTestOptions(NamedTuple):
+    multi_node_only: bool
+    trust_remote_code: bool
+    tokenizer_mode: Optional[str]
+
+
@dataclass
 class PPTestSettings:
    parallel_setups: List[ParallelSetup]
    distributed_backends: List[str]
    task: TaskOption
-    trust_remote_code: bool
-    tokenizer_mode: Optional[str]
+    test_options: PPTestOptions

    @staticmethod
    def detailed(
        *,
        tp_base: int = 1,
        pp_base: int = 2,
+        multi_node_only: bool = False,
        task: TaskOption = "auto",
        trust_remote_code: bool = False,
        tokenizer_mode: Optional[str] = None,
@@ -70,8 +76,9 @@ class PPTestSettings:
            ],
            distributed_backends=["mp", "ray"],
            task=task,
-            trust_remote_code=trust_remote_code,
-            tokenizer_mode=tokenizer_mode,
+            test_options=PPTestOptions(multi_node_only=multi_node_only,
+                                       trust_remote_code=trust_remote_code,
+                                       tokenizer_mode=tokenizer_mode),
        )

    @staticmethod
@@ -80,6 +87,7 @@ class PPTestSettings:
        tp_base: int = 1,
        pp_base: int = 2,
        task: TaskOption = "auto",
+        multi_node_only: bool = False,
        trust_remote_code: bool = False,
        tokenizer_mode: Optional[str] = None,
    ):
@@ -92,15 +100,18 @@ class PPTestSettings:
            ],
            distributed_backends=["mp"],
            task=task,
-            trust_remote_code=trust_remote_code,
-            tokenizer_mode=tokenizer_mode,
+            test_options=PPTestOptions(multi_node_only=multi_node_only,
+                                       trust_remote_code=trust_remote_code,
+                                       tokenizer_mode=tokenizer_mode),
        )

    def iter_params(self, model_name: str):
+        opts = self.test_options
+
        for parallel_setup in self.parallel_setups:
            for distributed_backend in self.distributed_backends:
                yield (model_name, parallel_setup, distributed_backend,
-                       self.task, self.trust_remote_code, self.tokenizer_mode)
+                       self.task, opts)


 # NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU
@@ -110,6 +121,7 @@ class PPTestSettings:
 GENERATION_MODEL_SETTINGS = {
    # [DETAILED TESTS]
    "meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(),
+    "microsoft/Phi-3-mini-4k-instruct": PPTestSettings.detailed(trust_remote_code=True, multi_node_only=True),  # noqa: E501
    # [FAST TESTS]
    # Uses Llama
    # "BAAI/AquilaChat-7B": PPTestSettings.fast(),
@@ -151,10 +163,8 @@ GENERATION_MODEL_SETTINGS = {
    "facebook/opt-iml-max-1.3b": PPTestSettings.fast(),
    "OrionStarAI/Orion-14B-Chat": PPTestSettings.fast(trust_remote_code=True),
    "microsoft/phi-2": PPTestSettings.fast(),
-    "microsoft/Phi-3-mini-4k-instruct": PPTestSettings.fast(),
    "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(trust_remote_code=True),  # noqa: E501
-    # FIXME: https://github.com/vllm-project/vllm/issues/8553
-    # "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.fast(trust_remote_code=True),  # noqa: E501
+    "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.fast(trust_remote_code=True),  # noqa: E501
    "adept/persimmon-8b-chat": PPTestSettings.fast(),
    "Qwen/Qwen-7B-Chat": PPTestSettings.fast(trust_remote_code=True),
    "Qwen/Qwen2-beta-7B-Chat": PPTestSettings.fast(),
@@ -205,6 +215,7 @@ TEST_MODELS = [
    # [LANGUAGE GENERATION]
    "meta-llama/Meta-Llama-3-8B",
    "ibm/PowerLM-3b",
+    "microsoft/Phi-3-mini-4k-instruct",
    # [LANGUAGE EMBEDDING]
    "intfloat/e5-mistral-7b-instruct",
    "BAAI/bge-multilingual-gemma2",
@@ -220,19 +231,21 @@ def _compare_tp(
    parallel_setup: ParallelSetup,
    distributed_backend: str,
    task: TaskOption,
-    trust_remote_code: bool,
-    tokenizer_mode: Optional[str],
+    test_options: PPTestOptions,
    num_gpus_available: int,
    *,
-    method: Literal["generate", "encode"] = "encode",
+    method: Literal["generate", "encode"],
 ):
    tp_size, pp_size, eager_mode, chunked_prefill = parallel_setup
+    multi_node_only, trust_remote_code, tokenizer_mode = test_options

    if num_gpus_available < tp_size * pp_size:
        pytest.skip(f"Need at least {tp_size} x {pp_size} GPUs")
    if VLLM_MULTI_NODE and distributed_backend == "mp":
        pytest.skip("Skipping multi-node pipeline parallel test for "
                    "multiprocessing distributed backend")
+    if multi_node_only and not VLLM_MULTI_NODE:
+        pytest.skip("Not in multi-node setting")

    common_args = [
        # use half precision for speed and memory savings in CI environment
@@ -307,7 +320,7 @@ def _compare_tp(

@pytest.mark.parametrize(
    ("model_name", "parallel_setup", "distributed_backend", "task",
-     "trust_remote_code", "tokenizer_mode"),
+     "test_options"),
    [
        params for model_name, settings in GENERATION_MODEL_SETTINGS.items()
        for params in settings.iter_params(model_name)
@@ -320,23 +333,21 @@ def test_tp_language_generation(
    parallel_setup: ParallelSetup,
    distributed_backend: str,
    task: TaskOption,
-    trust_remote_code: bool,
-    tokenizer_mode: Optional[str],
+    test_options: PPTestOptions,
    num_gpus_available,
 ):
    _compare_tp(model_name,
                parallel_setup,
                distributed_backend,
                task,
-                trust_remote_code,
-                tokenizer_mode,
+                test_options,
                num_gpus_available,
                method="generate")


@pytest.mark.parametrize(
    ("model_name", "parallel_setup", "distributed_backend", "task",
-     "trust_remote_code", "tokenizer_mode"),
+     "test_options"),
    [
        params for model_name, settings in EMBEDDING_MODEL_SETTINGS.items()
        for params in settings.iter_params(model_name)
@@ -349,23 +360,21 @@ def test_tp_language_embedding(
    parallel_setup: ParallelSetup,
    distributed_backend: str,
    task: TaskOption,
-    trust_remote_code: bool,
-    tokenizer_mode: Optional[str],
+    test_options: PPTestOptions,
    num_gpus_available,
 ):
    _compare_tp(model_name,
                parallel_setup,
                distributed_backend,
                task,
-                trust_remote_code,
-                tokenizer_mode,
+                test_options,
                num_gpus_available,
                method="encode")


@pytest.mark.parametrize(
    ("model_name", "parallel_setup", "distributed_backend", "task",
-     "trust_remote_code", "tokenizer_mode"),
+     "test_options"),
    [
        params for model_name, settings in MULTIMODAL_MODEL_SETTINGS.items()
        for params in settings.iter_params(model_name)
@@ -378,15 +387,13 @@ def test_tp_multimodal_generation(
    parallel_setup: ParallelSetup,
    distributed_backend: str,
    task: TaskOption,
-    trust_remote_code: bool,
-    tokenizer_mode: Optional[str],
+    test_options: PPTestOptions,
    num_gpus_available,
 ):
    _compare_tp(model_name,
                parallel_setup,
                distributed_backend,
                task,
-                trust_remote_code,
-                tokenizer_mode,
+                test_options,
                num_gpus_available,
                method="generate")