[Bugfix] Fix byte fallback handling when using outlines (#31391)

Signed-off-by: Shinichi Hemmi <50256998+Alnusjaponica@users.noreply.github.com> Co-authored-by: Kenichi Maehashi <maehashi@preferred.jp>
2026-01-21 04:48:08 +09:00
parent 7c5dedc247
commit 86c69dc54c
2 changed files with 9 additions and 2 deletions
--- a/vllm/v1/structured_output/backend_outlines.py
+++ b/vllm/v1/structured_output/backend_outlines.py
@@ -122,7 +122,12 @@ class OutlinesGrammar(StructuredOutputGrammar):
        Returns False if the FSM failed to advance.
        """
        if self.guide.accepts_tokens(tokens):
-            # Advance cannot fail because we checked Guide.accepts_tokens()
+            # Advance can fail when the next state reached after advancing with
+            # the current tokens is a dead state. This is because Guide.accepts_tokens()
+            # only checks whether the current tokens can be accepted,
+            # whereas guide.advance() additionally checks the next state
+            # after all tokens are accepted.
+            # We need to be aware that the FSM must be prepared without dead states.
            for t in tokens:
                self.guide.advance(t)
                self.num_processed_tokens += 1
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -226,7 +226,9 @@ def _reduced_vocabulary(
                # by this point.
                token_bytes = bytes(token_str)  # type: ignore[arg-type]

-            elif "\ufffd" in token_str and not re_replacement_seq.match(token_str):
+            elif (token_str == "\ufffd" and token != "\ufffd") or (
+                "\ufffd" in token_str and not re_replacement_seq.match(token_str)
+            ):
                # Handle tokens with invalid UTF-8 sequences.
                if re_llama_byte_token.match(token):
                    # Llama-like tokenizers use <0xXX> for incomplete sequences.