[Core][Performance] Add XGrammar support for guided decoding and set it as default (#10785)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Signed-off-by: mgoin <michael@neuralmagic.com> Co-authored-by: mgoin <michael@neuralmagic.com>
2024-12-03 02:17:00 -05:00
parent 3257d449fa
commit 9323a3153b
11 changed files with 385 additions and 33 deletions
--- a/tests/entrypoints/llm/test_guided_generate.py
+++ b/tests/entrypoints/llm/test_guided_generate.py
@@ -159,3 +159,30 @@ def test_validation_against_both_guided_decoding_options(sample_regex, llm):
                     sampling_params=sampling_params,
                     use_tqdm=True,
                     guided_options_request=dict(guided_regex=sample_regex))
+
+
+@pytest.mark.skip_global_cleanup
+def test_guided_json_object(llm):
+    sampling_params = SamplingParams(
+        temperature=1.0,
+        max_tokens=100,
+        guided_decoding=GuidedDecodingParams(json_object=True))
+
+    outputs = llm.generate(
+        prompts=("Generate a JSON object describing a person with name "
+                 "and age for John Smith who is 31 years old."),
+        sampling_params=sampling_params,
+        use_tqdm=True)
+
+    assert outputs is not None
+    for output in outputs:
+        assert output is not None
+        assert isinstance(output, RequestOutput)
+
+        generated_text = output.outputs[0].text
+        print(generated_text)
+        assert generated_text is not None
+
+        # Parse to verify it is valid JSON
+        parsed_json = json.loads(generated_text)
+        assert isinstance(parsed_json, dict)