tests/entrypoints/openai/test_launch_render.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""E2E tests for render endpoints via `vllm launch` (GPU-less serving)."""

import httpx
import pytest
import pytest_asyncio

from ...utils import RemoteLaunchRenderServer

MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"


@pytest.fixture(scope="module")
def server():
    args: list[str] = []
    with RemoteLaunchRenderServer(MODEL_NAME, args, max_wait_seconds=120) as srv:
        yield srv


@pytest_asyncio.fixture
async def client(server):
    async with httpx.AsyncClient(
        base_url=server.url_for(""), timeout=30.0
    ) as http_client:
        yield http_client


# -- Chat Completion Render --


@pytest.mark.asyncio
async def test_chat_render_basic(client):
    response = await client.post(
        "/v1/chat/completions/render",
        json={
            "model": MODEL_NAME,
            "messages": [{"role": "user", "content": "Hello, how are you?"}],
        },
    )

    assert response.status_code == 200
    data = response.json()

    # Response should be a GenerateRequest dict
    assert isinstance(data, dict)
    assert "token_ids" in data
    assert isinstance(data["token_ids"], list)
    assert len(data["token_ids"]) > 0
    assert all(isinstance(t, int) for t in data["token_ids"])


@pytest.mark.asyncio
async def test_chat_render_multi_turn(client):
    response = await client.post(
        "/v1/chat/completions/render",
        json={
            "model": MODEL_NAME,
            "messages": [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there!"},
                {"role": "user", "content": "How are you?"},
            ],
        },
    )

    assert response.status_code == 200
    data = response.json()

    assert isinstance(data, dict)
    assert "token_ids" in data
    assert isinstance(data["token_ids"], list)
    assert len(data["token_ids"]) > 0


@pytest.mark.asyncio
async def test_chat_render_invalid_model(client):
    response = await client.post(
        "/v1/chat/completions/render",
        json={
            "model": "nonexistent-model",
            "messages": [{"role": "user", "content": "Hello"}],
        },
    )

    assert response.status_code == 404
    assert "error" in response.json()


# -- Completion Render --


@pytest.mark.asyncio
async def test_completion_render_basic(client):
    response = await client.post(
        "/v1/completions/render",
        json={
            "model": MODEL_NAME,
            "prompt": "Once upon a time",
        },
    )

    assert response.status_code == 200
    data = response.json()

    assert isinstance(data, list)
    assert len(data) > 0

    first_prompt = data[0]
    assert "token_ids" in first_prompt
    assert "sampling_params" in first_prompt
    assert "model" in first_prompt
    assert "request_id" in first_prompt
    assert isinstance(first_prompt["token_ids"], list)
    assert len(first_prompt["token_ids"]) > 0
    assert first_prompt["request_id"].startswith("cmpl-")


@pytest.mark.asyncio
async def test_completion_render_multiple_prompts(client):
    response = await client.post(
        "/v1/completions/render",
        json={
            "model": MODEL_NAME,
            "prompt": ["Hello world", "Goodbye world"],
        },
    )

    assert response.status_code == 200
    data = response.json()

    assert isinstance(data, list)
    assert len(data) == 2

    for prompt in data:
        assert "token_ids" in prompt
        assert "sampling_params" in prompt
        assert "model" in prompt
        assert "request_id" in prompt
        assert len(prompt["token_ids"]) > 0
        assert prompt["request_id"].startswith("cmpl-")


@pytest.mark.asyncio
async def test_completion_render_invalid_model(client):
    response = await client.post(
        "/v1/completions/render",
        json={
            "model": "nonexistent-model",
            "prompt": "Hello",
        },
    )

    assert response.status_code == 404
    assert "error" in response.json()


@pytest.mark.asyncio
async def test_render_is_fast(client):
    """Render should complete quickly since there is no inference."""
    import time

    start = time.perf_counter()
    response = await client.post(
        "/v1/completions/render",
        json={
            "model": MODEL_NAME,
            "prompt": "Tell me a very long story about " * 10,
        },
    )
    elapsed = time.perf_counter() - start

    assert response.status_code == 200
    assert elapsed < 2.0


# -- Health & Models --


@pytest.mark.asyncio
async def test_health_endpoint(client):
    response = await client.get("/health")
    assert response.status_code == 200


@pytest.mark.asyncio
async def test_models_endpoint(client):
    response = await client.get("/v1/models")
    assert response.status_code == 200
    data = response.json()
    assert "data" in data
    model_ids = [m["id"] for m in data["data"]]
    assert MODEL_NAME in model_ids
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			"""E2E tests for render endpoints via `vllm launch` (GPU-less serving)."""
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00
			`import httpx`
			`import pytest`
			`import pytest_asyncio`

[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`from ...utils import RemoteLaunchRenderServer`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00
			`MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"`


			`@pytest.fixture(scope="module")`
			`def server():`
			`args: list[str] = []`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`with RemoteLaunchRenderServer(MODEL_NAME, args, max_wait_seconds=120) as srv:`
			`yield srv`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest_asyncio.fixture`
			`async def client(server):`
			`async with httpx.AsyncClient(`
			`base_url=server.url_for(""), timeout=30.0`
			`) as http_client:`
			`yield http_client`


[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`# -- Chat Completion Render --`


[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_chat_render_basic(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"/v1/chat/completions/render",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`json={`
			`"model": MODEL_NAME,`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"messages": [{"role": "user", "content": "Hello, how are you?"}],`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`},`
			`)`

			`assert response.status_code == 200`
			`data = response.json()`

[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684) 2026-03-14 17:10:11 +03:00			`# Response should be a GenerateRequest dict`
			`assert isinstance(data, dict)`
			`assert "token_ids" in data`
			`assert isinstance(data["token_ids"], list)`
			`assert len(data["token_ids"]) > 0`
			`assert all(isinstance(t, int) for t in data["token_ids"])`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_chat_render_multi_turn(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
			`"/v1/chat/completions/render",`
			`json={`
			`"model": MODEL_NAME,`
			`"messages": [`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`{"role": "user", "content": "Hello"},`
			`{"role": "assistant", "content": "Hi there!"},`
			`{"role": "user", "content": "How are you?"},`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`],`
			`},`
			`)`

			`assert response.status_code == 200`
[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684) 2026-03-14 17:10:11 +03:00			`data = response.json()`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00
[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684) 2026-03-14 17:10:11 +03:00			`assert isinstance(data, dict)`
			`assert "token_ids" in data`
			`assert isinstance(data["token_ids"], list)`
			`assert len(data["token_ids"]) > 0`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_chat_render_invalid_model(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"/v1/chat/completions/render",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`json={`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"model": "nonexistent-model",`
			`"messages": [{"role": "user", "content": "Hello"}],`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`},`
			`)`

[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`assert response.status_code == 404`
			`assert "error" in response.json()`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`# -- Completion Render --`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_completion_render_basic(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"/v1/completions/render",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`json={`
			`"model": MODEL_NAME,`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"prompt": "Once upon a time",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`},`
			`)`

			`assert response.status_code == 200`
			`data = response.json()`

[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`assert isinstance(data, list)`
			`assert len(data) > 0`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`first_prompt = data[0]`
[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684) 2026-03-14 17:10:11 +03:00			`assert "token_ids" in first_prompt`
			`assert "sampling_params" in first_prompt`
			`assert "model" in first_prompt`
			`assert "request_id" in first_prompt`
			`assert isinstance(first_prompt["token_ids"], list)`
			`assert len(first_prompt["token_ids"]) > 0`
			`assert first_prompt["request_id"].startswith("cmpl-")`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_completion_render_multiple_prompts(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
			`"/v1/completions/render",`
			`json={`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"model": MODEL_NAME,`
			`"prompt": ["Hello world", "Goodbye world"],`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`},`
			`)`

[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`assert response.status_code == 200`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`data = response.json()`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00
			`assert isinstance(data, list)`
			`assert len(data) == 2`

			`for prompt in data:`
[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684) 2026-03-14 17:10:11 +03:00			`assert "token_ids" in prompt`
			`assert "sampling_params" in prompt`
			`assert "model" in prompt`
			`assert "request_id" in prompt`
			`assert len(prompt["token_ids"]) > 0`
			`assert prompt["request_id"].startswith("cmpl-")`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_completion_render_invalid_model(client):`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`response = await client.post(`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"/v1/completions/render",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`json={`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`"model": "nonexistent-model",`
			`"prompt": "Hello",`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`},`
			`)`

			`assert response.status_code == 404`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`assert "error" in response.json()`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00

			`@pytest.mark.asyncio`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`async def test_render_is_fast(client):`
			`"""Render should complete quickly since there is no inference."""`
[Frontend] Add render endpoints for prompt preprocessing (#32473) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> Signed-off-by: Hyunkyun Moon <mhg5303@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> 2026-01-19 13:21:46 +09:00			`import time`

			`start = time.perf_counter()`
			`response = await client.post(`
			`"/v1/completions/render",`
			`json={`
			`"model": MODEL_NAME,`
			`"prompt": "Tell me a very long story about " * 10,`
			`},`
			`)`
			`elapsed = time.perf_counter() - start`

			`assert response.status_code == 200`
[Frontend] Add vllm launch command for GPU-less preprocessing serving (#34551) Signed-off-by: HyunKyun Moon <mhg5303@gmail.com> 2026-03-05 03:41:52 +09:00			`assert elapsed < 2.0`


			`# -- Health & Models --`


			`@pytest.mark.asyncio`
			`async def test_health_endpoint(client):`
			`response = await client.get("/health")`
			`assert response.status_code == 200`


			`@pytest.mark.asyncio`
			`async def test_models_endpoint(client):`
			`response = await client.get("/v1/models")`
			`assert response.status_code == 200`
			`data = response.json()`
			`assert "data" in data`
			`model_ids = [m["id"] for m in data["data"]]`
			`assert MODEL_NAME in model_ids`