tests/v1/tracing/test_tracing.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa
# type: ignore
import pytest
import time
from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_TRACES_INSECURE

from vllm import LLM, SamplingParams
from vllm.tracing import SpanAttributes

# Import shared fixtures from the tracing conftest
from tests.tracing.conftest import (  # noqa: F401
    FAKE_TRACE_SERVER_ADDRESS,
    FakeTraceService,
    trace_service,
)


def test_traces(
    monkeypatch: pytest.MonkeyPatch,
    trace_service: FakeTraceService,
):
    with monkeypatch.context() as m:
        m.setenv(OTEL_EXPORTER_OTLP_TRACES_INSECURE, "true")

        sampling_params = SamplingParams(
            temperature=0.01,
            top_p=0.1,
            max_tokens=256,
        )
        model = "facebook/opt-125m"
        llm = LLM(
            model=model,
            otlp_traces_endpoint=FAKE_TRACE_SERVER_ADDRESS,
            gpu_memory_utilization=0.3,
            disable_log_stats=False,
        )
        prompts = ["This is a short prompt"]
        outputs = llm.generate(prompts, sampling_params=sampling_params)
        print(f"test_traces outputs is : {outputs}")

        # Wait for the "llm_request" span to be exported.
        # The BatchSpanProcessor batches spans and exports them periodically,
        # so we need to wait specifically for the llm_request span to appear.
        timeout = 15
        deadline = time.time() + timeout
        llm_request_spans = []
        while time.time() < deadline:
            all_spans = trace_service.get_all_spans()
            llm_request_spans = [s for s in all_spans if s["name"] == "llm_request"]
            if llm_request_spans:
                break
            time.sleep(0.5)

        assert len(llm_request_spans) == 1, (
            f"Expected exactly 1 'llm_request' span, but got {len(llm_request_spans)}. "
            f"All span names: {[s['name'] for s in all_spans]}"
        )

        attributes = llm_request_spans[0]["attributes"]
        # assert attributes.get(SpanAttributes.GEN_AI_RESPONSE_MODEL) == model
        assert attributes.get(SpanAttributes.GEN_AI_REQUEST_ID) == outputs[0].request_id
        assert (
            attributes.get(SpanAttributes.GEN_AI_REQUEST_TEMPERATURE)
            == sampling_params.temperature
        )
        assert (
            attributes.get(SpanAttributes.GEN_AI_REQUEST_TOP_P) == sampling_params.top_p
        )
        assert (
            attributes.get(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS)
            == sampling_params.max_tokens
        )
        assert attributes.get(SpanAttributes.GEN_AI_REQUEST_N) == sampling_params.n
        assert attributes.get(SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS) == len(
            outputs[0].prompt_token_ids
        )
        completion_tokens = sum(len(o.token_ids) for o in outputs[0].outputs)
        assert (
            attributes.get(SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS)
            == completion_tokens
        )

        assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE) > 0
        assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN) > 0
        assert attributes.get(SpanAttributes.GEN_AI_LATENCY_E2E) > 0
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
			`# ruff: noqa`
			`# type: ignore`
			`import pytest`
[Feature] OTEL tracing during loading (#31162) 2026-02-06 01:59:28 +01:00			`import time`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`from opentelemetry.sdk.environment_variables import OTEL_EXPORTER_OTLP_TRACES_INSECURE`

			`from vllm import LLM, SamplingParams`
			`from vllm.tracing import SpanAttributes`

[Feature] OTEL tracing during loading (#31162) 2026-02-06 01:59:28 +01:00			`# Import shared fixtures from the tracing conftest`
			`from tests.tracing.conftest import ( # noqa: F401`
			`FAKE_TRACE_SERVER_ADDRESS,`
			`FakeTraceService,`
			`trace_service,`
			`)`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00

			`def test_traces(`
			`monkeypatch: pytest.MonkeyPatch,`
			`trace_service: FakeTraceService,`
			`):`
			`with monkeypatch.context() as m:`
			`m.setenv(OTEL_EXPORTER_OTLP_TRACES_INSECURE, "true")`
[V0 Deprecation] Remove `VLLM_USE_V1` from tests (#26341) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> 2025-10-07 23:42:31 +08:00
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`sampling_params = SamplingParams(`
			`temperature=0.01,`
			`top_p=0.1,`
			`max_tokens=256,`
			`)`
			`model = "facebook/opt-125m"`
			`llm = LLM(`
			`model=model,`
			`otlp_traces_endpoint=FAKE_TRACE_SERVER_ADDRESS,`
			`gpu_memory_utilization=0.3,`
			`disable_log_stats=False,`
			`)`
			`prompts = ["This is a short prompt"]`
			`outputs = llm.generate(prompts, sampling_params=sampling_params)`
			`print(f"test_traces outputs is : {outputs}")`

[Feature] OTEL tracing during loading (#31162) 2026-02-06 01:59:28 +01:00			`# Wait for the "llm_request" span to be exported.`
			`# The BatchSpanProcessor batches spans and exports them periodically,`
			`# so we need to wait specifically for the llm_request span to appear.`
			`timeout = 15`
			`deadline = time.time() + timeout`
			`llm_request_spans = []`
			`while time.time() < deadline:`
			`all_spans = trace_service.get_all_spans()`
			`llm_request_spans = [s for s in all_spans if s["name"] == "llm_request"]`
			`if llm_request_spans:`
			`break`
			`time.sleep(0.5)`

			`assert len(llm_request_spans) == 1, (`
			`f"Expected exactly 1 'llm_request' span, but got {len(llm_request_spans)}. "`
			`f"All span names: {[s['name'] for s in all_spans]}"`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`)`

[Feature] OTEL tracing during loading (#31162) 2026-02-06 01:59:28 +01:00			`attributes = llm_request_spans[0]["attributes"]`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`# assert attributes.get(SpanAttributes.GEN_AI_RESPONSE_MODEL) == model`
			`assert attributes.get(SpanAttributes.GEN_AI_REQUEST_ID) == outputs[0].request_id`
			`assert (`
			`attributes.get(SpanAttributes.GEN_AI_REQUEST_TEMPERATURE)`
			`== sampling_params.temperature`
			`)`
			`assert (`
			`attributes.get(SpanAttributes.GEN_AI_REQUEST_TOP_P) == sampling_params.top_p`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00			`)`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00			`assert (`
			`attributes.get(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS)`
			`== sampling_params.max_tokens`
			`)`
			`assert attributes.get(SpanAttributes.GEN_AI_REQUEST_N) == sampling_params.n`
			`assert attributes.get(SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS) == len(`
			`outputs[0].prompt_token_ids`
			`)`
			`completion_tokens = sum(len(o.token_ids) for o in outputs[0].outputs)`
			`assert (`
			`attributes.get(SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS)`
			`== completion_tokens`
Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> 2025-10-05 15:06:22 +01:00			`)`
[V1] feat:add engine v1 tracing (#20372) Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com> Signed-off-by: Ye Zhang <zhysishu@gmail.com> Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com> Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com> Co-authored-by: Ye Zhang <zhysishu@gmail.com> Co-authored-by: Benjamin Bartels <benjamin@bartels.dev> Co-authored-by: simon-mo <simon.mo@hey.com> Co-authored-by: 瑜琮 <ly186375@antfin.com> Co-authored-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> 2025-09-12 08:10:39 +08:00
			`assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE) > 0`
			`assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN) > 0`
			`assert attributes.get(SpanAttributes.GEN_AI_LATENCY_E2E) > 0`