[Perf] Optimize Python Slice for Structured Output using islice instead of [:] (#33593)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Wentao Ye
2026-02-24 12:02:36 -05:00
committed by GitHub
parent c38b8d5a31
commit 9ce8fad2a9
8 changed files with 23 additions and 16 deletions

View File

@@ -4,7 +4,7 @@
import importlib
import os
from abc import abstractmethod
from collections.abc import Callable, Sequence
from collections.abc import Callable, Iterable, Sequence
from functools import cached_property
from typing import TYPE_CHECKING, Any
@@ -68,7 +68,7 @@ class ReasoningParser:
"""
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
"""
Check if the reasoning content ends in the input_ids on a

View File

@@ -2,7 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import abstractmethod
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from itertools import islice
from typing import TYPE_CHECKING, Any
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
@@ -77,7 +78,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
return False
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
end_token_id = self.end_token_id
return end_token_id in delta_ids
@@ -86,7 +87,7 @@ class BaseThinkingReasoningParser(ReasoningParser):
"""
Extract the content after the end tokens
"""
if self.end_token_id not in input_ids[:-1]:
if self.end_token_id not in islice(input_ids, 0, max(0, len(input_ids) - 1)):
return []
else:
return input_ids[input_ids.index(self.end_token_id) + 1 :]

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from transformers import PreTrainedTokenizerBase
@@ -41,7 +41,7 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
return self._parser.is_reasoning_end(input_ids)
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
return self._parser.is_reasoning_end_streaming(input_ids, delta_ids)

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from transformers import PreTrainedTokenizerBase
@@ -36,7 +36,7 @@ class IdentityReasoningParser(ReasoningParser):
return True
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
return True

View File

@@ -69,7 +69,7 @@ class MistralReasoningParser(BaseThinkingReasoningParser):
def is_reasoning_end(self, input_ids: Sequence[int]) -> bool:
has_eot_token = False
for id in input_ids[::-1]:
for id in reversed(input_ids):
if id == self.start_token_id:
# Reasoning ends only if a BOT token is found before a EOT token.
return has_eot_token

View File

@@ -1,7 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from itertools import islice
import regex as re
from transformers import PreTrainedTokenizerBase
@@ -104,13 +105,15 @@ class Step3ReasoningParser(ReasoningParser):
return self.think_end_token_id in input_ids
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
end_token_id = self.think_end_token_id
return end_token_id in delta_ids
def extract_content_ids(self, input_ids: list[int]) -> list[int]:
if self.think_end_token_id not in input_ids[:-1]:
if self.think_end_token_id not in islice(
input_ids, 0, max(0, len(input_ids) - 1)
):
return []
else:
return input_ids[input_ids.index(self.think_end_token_id) + 1 :]

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
@@ -51,7 +51,7 @@ class Step3p5ReasoningParser(BaseThinkingReasoningParser):
return self.end_offset < 1
def is_reasoning_end_streaming(
self, input_ids: Sequence[int], delta_ids: Sequence[int]
self, input_ids: Sequence[int], delta_ids: Iterable[int]
) -> bool:
if self.end_token_id in input_ids and self.end_offset > 0:
self.end_offset -= 1

View File

@@ -324,8 +324,11 @@ class StructuredOutputManager:
# Check if reasoning ends in *this* step
delta_from = request.num_computed_tokens - request.num_output_placeholders
all_token_ids = request.all_token_ids
start = (
delta_from if delta_from >= 0 else max(len(all_token_ids) + delta_from, 0)
)
if self.reasoner.is_reasoning_end_streaming(
all_token_ids, all_token_ids[delta_from:]
all_token_ids, itertools.islice(all_token_ids, start, None)
):
# Reasoning just ended, so we shouldn't advance til
# next pass