[V0][V1][Core] Add outlines integration for V1, and update V0 integration. (#15975)

Signed-off-by: Nathan Hoos <thwackyy.y@gmail.com>
2025-07-10 14:30:26 -05:00
parent 5e53c89a74
commit d6902ce79f
13 changed files with 804 additions and 461 deletions
--- a/tests/model_executor/test_guided_processors.py
+++ b/tests/model_executor/test_guided_processors.py
@@ -46,20 +46,15 @@ def test_guided_logits_processors(zephyr_7B_tokenzer, sample_regex,
                                  whitespace_pattern=None,
                                  reasoner=None)

-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
    tensor = torch.rand(32000)
    original_tensor = torch.clone(tensor)
-    regex_LP(token_ids, tensor)
+    tensor = regex_LP([], tensor)
    assert tensor.shape == original_tensor.shape
    assert not torch.allclose(tensor, original_tensor)

-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
    tensor = torch.rand(32000)
    original_tensor = torch.clone(tensor)
-    json_LP(token_ids, tensor)
+    tensor = json_LP([], tensor)
    assert tensor.shape == original_tensor.shape
    assert not torch.allclose(tensor, original_tensor)

@@ -81,8 +76,6 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
        seed=0,
        dtype="bfloat16",
    )
-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}")
    regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)

    regex_lp = get_local_guided_decoding_logits_processor(
@@ -92,13 +85,11 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
    assert regex_lp is not None
    tensor = torch.rand(32000)
    original_tensor = torch.clone(tensor)
-    tensor = regex_lp(token_ids, tensor)
+    # allowed tokens at state 0
+    tensor = regex_lp([], tensor)
    assert tensor.shape == original_tensor.shape
    assert not torch.allclose(tensor, original_tensor)

-    token_ids = zephyr_7B_tokenzer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}"
-    )
    json_request = GuidedDecodingParams(json=sample_json_schema,
                                        backend=backend)
    json_lp = await get_guided_decoding_logits_processor(
@@ -106,7 +97,7 @@ async def test_guided_logits_processor_black_box(backend: str, is_local: bool,
    assert json_lp is not None
    tensor = torch.rand(32000)
    original_tensor = torch.clone(tensor)
-    tensor = json_lp(token_ids, tensor)
+    tensor = json_lp([], tensor)
    assert tensor.shape == original_tensor.shape
    assert not torch.allclose(tensor, original_tensor)

@@ -130,7 +121,6 @@ async def test_guided_logits_processor_with_reasoning(
        dtype="bfloat16",
    )
    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an example IPv4 address with this regex: {sample_regex}."
        "<think>here is the thinking process")
    regex_request = GuidedDecodingParams(regex=sample_regex, backend=backend)

@@ -141,14 +131,13 @@ async def test_guided_logits_processor_with_reasoning(
                    regex_request, deepseek_r1_qwen_tokenizer, config,
                    reasoning_backend)
    assert regex_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
    original_tensor = torch.clone(tensor)
    tensor = regex_lp(token_ids, tensor)
    assert tensor.shape == original_tensor.shape
    assert torch.allclose(tensor, original_tensor)

    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}."
        "<think>here is the thinking process")
    json_request = GuidedDecodingParams(json=sample_json_schema,
                                        backend=backend)
@@ -158,7 +147,7 @@ async def test_guided_logits_processor_with_reasoning(
        await get_guided_decoding_logits_processor(
            json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
    assert json_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
    original_tensor = torch.clone(tensor)
    tensor = json_lp(token_ids, tensor)
    assert tensor.shape == original_tensor.shape
@@ -166,8 +155,7 @@ async def test_guided_logits_processor_with_reasoning(

    # Thinking is over, so the tensor should change.
    token_ids = deepseek_r1_qwen_tokenizer.encode(
-        f"Give an employee profile that fits this schema: {sample_json_schema}."
-        "<think>here is the thinking process</think> Then")
+        "<think>here is the thinking process</think>")
    json_request = GuidedDecodingParams(json=sample_json_schema,
                                        backend=backend)
    json_lp = get_local_guided_decoding_logits_processor(
@@ -176,7 +164,7 @@ async def test_guided_logits_processor_with_reasoning(
        await get_guided_decoding_logits_processor(
            json_request, deepseek_r1_qwen_tokenizer, config, reasoning_backend)
    assert json_lp is not None
-    tensor = torch.rand(32000)
+    tensor = torch.rand(151664)
    original_tensor = torch.clone(tensor)
    tensor = json_lp(token_ids, tensor)
    assert tensor.shape == original_tensor.shape