[Frontend] Add backend-specific options for guided decoding (#13505)

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
Joe Runde
2025-02-20 13:07:58 -07:00
committed by GitHub
parent 6a417b8600
commit bfbc0b32c6
8 changed files with 123 additions and 42 deletions

View File

@@ -2,7 +2,7 @@
from enum import Enum
from openai import OpenAI
from openai import BadRequestError, OpenAI
from pydantic import BaseModel
client = OpenAI(
@@ -94,3 +94,26 @@ completion = client.chat.completions.create(
extra_body={"guided_grammar": simplified_sql_grammar},
)
print(completion.choices[0].message.content)
# Extra backend options
prompt = ("Generate an email address for Alan Turing, who works in Enigma."
"End in .com and new line. Example result:"
"alan.turing@enigma.com\n")
try:
# The no-fallback option forces vLLM to use xgrammar, so when it fails
# you get a 400 with the reason why
completion = client.chat.completions.create(
model="Qwen/Qwen2.5-3B-Instruct",
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": "\w+@\w+\.com\n",
"stop": ["\n"],
"guided_decoding_backend": "xgrammar:no-fallback"
},
)
except BadRequestError as e:
print("This error is expected:", e)