[V1] feat:add engine v1 tracing (#20372)

Signed-off-by: Mu Huai <tianbowen.tbw@antgroup.com>
Signed-off-by: Ye Zhang <zhysishu@gmail.com>
Signed-off-by: RichardoMu <44485717+RichardoMrMu@users.noreply.github.com>
Signed-off-by: simon-mo <simon.mo@hey.com>
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
Co-authored-by: Mu Huai <tianbowen.tbw@antgroup.com>
Co-authored-by: Ye Zhang <zhysishu@gmail.com>
Co-authored-by: Benjamin Bartels <benjamin@bartels.dev>
Co-authored-by: simon-mo <simon.mo@hey.com>
Co-authored-by: 瑜琮 <ly186375@antfin.com>
Co-authored-by: Aaron Pham <contact@aarnphm.xyz>
Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com>
This commit is contained in:
RichardoMu
2025-09-12 08:10:39 +08:00
committed by GitHub
parent 2e6bc46821
commit 40b6c9122b
12 changed files with 253 additions and 20 deletions

View File

@@ -327,8 +327,6 @@ class Processor:
# TODO(woosuk): Support pooling models.
self._validate_lora(lora_request)
self._validate_params(params, lora_request)
if trace_headers is not None:
raise ValueError("V1 does not support tracing yet.")
data_parallel_size = self.vllm_config.parallel_config.data_parallel_size
if data_parallel_rank is not None and not (0 <= data_parallel_rank <
@@ -435,6 +433,7 @@ class Processor:
cache_salt=decoder_inputs.get("cache_salt"),
priority=priority,
data_parallel_rank=data_parallel_rank,
trace_headers=trace_headers,
)
def _validate_model_inputs(self,