[Deprecation] Remove prompt_token_ids arg fallback in LLM.generate and LLM.embed (#18800)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-22 10:56:57 +08:00
committed by GitHub
parent 19fe1a0510
commit 8896eb72eb
24 changed files with 116 additions and 467 deletions

View File

@@ -127,13 +127,15 @@ def test_structured_output(
temperature=1.0,
max_tokens=4096,
guided_decoding=GuidedDecodingParams(json=sample_json_schema))
outputs = llm.generate(prompts=[
(f"Give an example JSON for an employee profile that fits this "
f"schema. Make the response as short as possible. Schema: "
f"{sample_json_schema}")
] * 2,
sampling_params=sampling_params,
use_tqdm=True)
prompt = ("Give an example JSON for an employee profile that fits this "
"schema. Make the response as short as possible. Schema: "
f"{sample_json_schema}")
outputs = llm.generate(
[prompt] * 2,
sampling_params=sampling_params,
use_tqdm=True,
)
assert outputs is not None
@@ -191,20 +193,24 @@ def test_structured_output(
with pytest.raises(ValueError,
match="The provided JSON schema contains features "
"not supported by xgrammar."):
prompt = (f"Give an example JSON for an employee profile that "
f"fits this schema: {unsupported_json_schema}. "
f"Make the response as short as possible.")
llm.generate(
prompts=[(f"Give an example JSON for an employee profile that "
f"fits this schema: {unsupported_json_schema}. "
f"Make the response as short as possible.")] * 2,
[prompt] * 2,
sampling_params=sampling_params,
use_tqdm=True)
use_tqdm=True,
)
else:
outputs = llm.generate(prompts=(
"Give an example JSON object for a grade "
"that fits this schema: "
f"{unsupported_json_schema}. Make the response as short as "
"possible."),
sampling_params=sampling_params,
use_tqdm=True)
prompt = (f"Give an example JSON object for a grade that "
f"fits this schema: {unsupported_json_schema}. "
f"Make the response as short as possible.")
outputs = llm.generate(
prompt,
sampling_params=sampling_params,
use_tqdm=True,
)
assert outputs is not None
for output in outputs:
assert output is not None
@@ -227,10 +233,9 @@ def test_structured_output(
max_tokens=1000,
guided_decoding=GuidedDecodingParams(grammar=sample_sql_ebnf))
outputs = llm.generate(
prompts=(
"Generate a sql statement that selects col_1 from "
"table_1 where it is equal to 1. Make the response as short as "
"possible."),
("Generate a sql statement that selects col_1 from "
"table_1 where it is equal to 1. Make the response as short as "
"possible."),
sampling_params=sampling_params,
use_tqdm=True,
)
@@ -261,10 +266,9 @@ def test_structured_output(
max_tokens=1000,
guided_decoding=GuidedDecodingParams(grammar=sample_sql_lark))
outputs = llm.generate(
prompts=(
"Generate a sql statement that selects col_1 from "
"table_1 where it is equal to 1. Make the response as short as "
"possible."),
("Generate a sql statement that selects col_1 from "
"table_1 where it is equal to 1. Make the response as short as "
"possible."),
sampling_params=sampling_params,
use_tqdm=True,
)
@@ -301,7 +305,6 @@ def test_structured_output(
guided_decoding=GuidedDecodingParams(grammar="not a grammar"))
with pytest.raises(ValueError, match="Failed to convert the grammar "):
llm.generate(
prompts=
("Generate a sql statement that selects col_1 from "
"table_1 where it is equal to 1. Make the response as short "
"as possible."),
@@ -316,11 +319,11 @@ def test_structured_output(
temperature=0.8,
top_p=0.95,
guided_decoding=GuidedDecodingParams(regex=sample_regex))
prompt = (f"Give an example IPv4 address with this regex: {sample_regex}. "
f"Make the response as short as possible.")
outputs = llm.generate(
prompts=[
(f"Give an example IPv4 address with this regex: {sample_regex}. "
f"Make the response as short as possible.")
] * 2,
[prompt] * 2,
sampling_params=sampling_params,
use_tqdm=True,
)
@@ -343,11 +346,13 @@ def test_structured_output(
temperature=0.8,
top_p=0.95,
guided_decoding=GuidedDecodingParams(choice=sample_guided_choice))
outputs = llm.generate(
prompts=("The best language for type-safe systems programming is "
"(Make the response as short as possible.) "),
("The best language for type-safe systems programming is "
"(Make the response as short as possible.) "),
sampling_params=sampling_params,
use_tqdm=True)
use_tqdm=True,
)
assert outputs is not None
for output in outputs:
assert output is not None
@@ -367,12 +372,14 @@ def test_structured_output(
temperature=1.0,
max_tokens=1000,
guided_decoding=GuidedDecodingParams(json=json_schema))
outputs = llm.generate(prompts=(
"Generate a JSON with the brand, model and car_type of the most "
"iconic car from the 90's. Make the response as short as "
"possible."),
sampling_params=sampling_params,
use_tqdm=True)
outputs = llm.generate(
("Generate a JSON with the brand, model and car_type of the most "
"iconic car from the 90's. Make the response as short as "
"possible."),
sampling_params=sampling_params,
use_tqdm=True,
)
assert outputs is not None
@@ -411,10 +418,11 @@ def test_structured_output(
guided_decoding=GuidedDecodingParams(json=json_schema))
outputs = llm.generate(
prompts=("Generate a description of a frog using 50 characters. "
"Make the response as short as possible."),
("Generate a description of a frog using 50 characters. "
"Make the response as short as possible."),
sampling_params=sampling_params,
use_tqdm=True)
use_tqdm=True,
)
assert outputs is not None
@@ -498,7 +506,7 @@ Make the response as short as possible.
"""
# Change this once other backends support structural_tag
outputs = llm.generate(prompts=prompt,
outputs = llm.generate(prompt,
sampling_params=sampling_params,
use_tqdm=True)
assert outputs is not None
@@ -639,15 +647,13 @@ def test_structured_output_auto_mode(
f"{unsupported_json_schema}. Make the response as short as possible.")
# This would fail with the default of "xgrammar", but in "auto"
# we will handle fallback automatically.
outputs = llm.generate(prompts=prompts,
outputs = llm.generate(prompts,
sampling_params=sampling_params,
use_tqdm=True)
# Make sure `auto` backend handling doesn't mess up sampling_params
# and that we can reuse it without error.
outputs.extend(
llm.generate(prompts=prompts,
sampling_params=sampling_params,
use_tqdm=True))
llm.generate(prompts, sampling_params=sampling_params, use_tqdm=True))
assert outputs is not None
for output in outputs:
@@ -705,7 +711,7 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
max_tokens=256,
guided_decoding=guided_params)
outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)
outputs = llm.generate(prompt, sampling_params=sampling_params)
assert outputs is not None
generated_text = outputs[0].outputs[0].text
assert generated_text is not None