# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Orchestration-level integration tests for RayExecutorV2.
"""

import gc
import os
import pathlib

import pytest
import ray

pytestmark = pytest.mark.usefixtures("enable_ray_v2_backend")

MODEL = "facebook/opt-125m"


def _get_env_var(worker, name):
    return os.environ.get(name)


def _ray_init():
    """Start Ray with the project root on workers' PYTHONPATH.

    Without this, workers cannot unpickle actor classes defined in the
    ``tests`` package, causing FunctionActorManager to fall back to
    TemporaryActor which drops async method signatures."""
    project_root = str(pathlib.Path(__file__).resolve().parents[2])
    ray.init(
        ignore_reinit_error=True,
        runtime_env={"env_vars": {"PYTHONPATH": project_root}},
    )


@pytest.fixture
def ray_init():
    _ray_init()


class _AsyncLLMActor:
    def start(self, pg, bundle_indices=None, ray_runtime_env=None):
        os.environ["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
        # Needed so collective_rpc can pickle _get_env_var over the
        # AsyncLLM -> EngineCore ZMQ boundary.
        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
        if bundle_indices is not None:
            os.environ["VLLM_RAY_BUNDLE_INDICES"] = bundle_indices
        else:
            os.environ.pop("VLLM_RAY_BUNDLE_INDICES", None)

        from vllm.engine.arg_utils import AsyncEngineArgs
        from vllm.v1.engine.async_llm import AsyncLLM
        from vllm.v1.executor.abstract import Executor

        engine_args = AsyncEngineArgs(
            model=MODEL,
            tensor_parallel_size=2,
            distributed_executor_backend="ray",
            enforce_eager=True,
            max_model_len=256,
            gpu_memory_utilization=0.8,
        )
        vllm_config = engine_args.create_engine_config()
        vllm_config.parallel_config.placement_group = pg
        if ray_runtime_env is not None:
            vllm_config.parallel_config.ray_runtime_env = ray_runtime_env

        executor_class = Executor.get_class(vllm_config)
        self.engine = AsyncLLM(
            vllm_config=vllm_config,
            executor_class=executor_class,
            log_stats=False,
            log_requests=False,
        )

    async def generate(self, prompt):
        from vllm.sampling_params import SamplingParams

        params = SamplingParams(max_tokens=16)
        result = None
        async for output in self.engine.generate(
            prompt, params, request_id="test_request_id"
        ):
            result = output
        assert result is not None
        return result.outputs[0].text

    async def generate_and_get_worker_envs(self, prompt, env_names):
        from vllm.sampling_params import SamplingParams

        params = SamplingParams(max_tokens=16)
        result = None
        async for output in self.engine.generate(
            prompt, params, request_id="test_request_id"
        ):
            result = output
        assert result is not None
        text = result.outputs[0].text

        env_results = {}
        for name in env_names:
            vals = await self.engine.collective_rpc(
                _get_env_var, timeout=10, args=(name,)
            )
            env_results[name] = vals
        return text, env_results

    def shutdown(self):
        if engine := getattr(self, "engine", None):
            engine.shutdown()
            del self.engine
            gc.collect()


AsyncLLMActor = ray.remote(num_cpus=0, max_concurrency=1)(_AsyncLLMActor)


def test_multi_replicas(ray_init):
    pg1 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
    pg2 = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
    ray.get([pg1.ready(), pg2.ready()])

    actor1 = AsyncLLMActor.remote()
    actor2 = AsyncLLMActor.remote()

    ray.get(actor1.start.remote(pg1))
    ray.get(actor2.start.remote(pg2))

    out1, out2 = ray.get(
        [
            actor1.generate.remote("Hello world"),
            actor2.generate.remote("Hello world"),
        ]
    )
    assert len(out1) > 0
    assert len(out2) > 0


def test_multi_replicas_with_bundle_indices(ray_init):
    pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 4, strategy="PACK")
    ray.get(pg.ready())

    actor1 = AsyncLLMActor.remote()
    actor2 = AsyncLLMActor.remote()

    ray.get(actor1.start.remote(pg, bundle_indices="2,1"))
    ray.get(actor2.start.remote(pg, bundle_indices="0,3"))

    out1, out2 = ray.get(
        [
            actor1.generate.remote("Hello world"),
            actor2.generate.remote("Hello world"),
        ]
    )
    assert len(out1) > 0
    assert len(out2) > 0


def test_env_var_and_runtime_env_propagation():
    """
    Verify env vars (NCCL_, HF_) and parallel_config.ray_runtime_env
    propagate to RayWorkerProc actors.
    """
    sentinel_vars = {
        "NCCL_DEBUG": "INFO",
        "HF_TOKEN": "test_sentinel_token",
    }
    for k, v in sentinel_vars.items():
        os.environ[k] = v

    try:
        # Called directly (not via the ray_init fixture) because sentinel
        # env vars must be in os.environ before ray.init() so that Ray
        # worker processes inherit them.
        _ray_init()

        pg = ray.util.placement_group([{"GPU": 1, "CPU": 1}] * 2, strategy="PACK")
        ray.get(pg.ready())

        # Include the project root so that RayWorkerProc actors can
        # unpickle _get_env_var.
        project_root = str(pathlib.Path(__file__).resolve().parents[2])
        ray_runtime_env = {
            "env_vars": {
                "RAY_RUNTIME_ENV_TEST": "ray_runtime_env",
                "PYTHONPATH": project_root,
            },
        }

        actor = AsyncLLMActor.remote()
        ray.get(actor.start.remote(pg, ray_runtime_env=ray_runtime_env))

        all_env_names = list(sentinel_vars) + ["RAY_RUNTIME_ENV_TEST"]
        text, env_results = ray.get(
            actor.generate_and_get_worker_envs.remote("Hello world", all_env_names)
        )
        assert len(text) > 0

        for name, expected in sentinel_vars.items():
            for val in env_results[name]:
                assert val == expected

        for val in env_results["RAY_RUNTIME_ENV_TEST"]:
            assert val == "ray_runtime_env"

    finally:
        for k in sentinel_vars:
            os.environ.pop(k, None)