[Frontend] Add backend-specific options for guided decoding (#13505)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
2025-02-20 13:07:58 -07:00
parent 6a417b8600
commit bfbc0b32c6
8 changed files with 123 additions and 42 deletions
--- a/examples/online_serving/openai_chat_completion_structured_outputs.py
+++ b/examples/online_serving/openai_chat_completion_structured_outputs.py
@@ -2,7 +2,7 @@

 from enum import Enum

-from openai import OpenAI
+from openai import BadRequestError, OpenAI
 from pydantic import BaseModel

 client = OpenAI(
@@ -94,3 +94,26 @@ completion = client.chat.completions.create(
    extra_body={"guided_grammar": simplified_sql_grammar},
 )
 print(completion.choices[0].message.content)
+
+# Extra backend options
+prompt = ("Generate an email address for Alan Turing, who works in Enigma."
+          "End in .com and new line. Example result:"
+          "alan.turing@enigma.com\n")
+
+try:
+    # The no-fallback option forces vLLM to use xgrammar, so when it fails
+    # you get a 400 with the reason why
+    completion = client.chat.completions.create(
+        model="Qwen/Qwen2.5-3B-Instruct",
+        messages=[{
+            "role": "user",
+            "content": prompt,
+        }],
+        extra_body={
+            "guided_regex": "\w+@\w+\.com\n",
+            "stop": ["\n"],
+            "guided_decoding_backend": "xgrammar:no-fallback"
+        },
+    )
+except BadRequestError as e:
+    print("This error is expected:", e)