[V1][Spec Decode] Respect prompt_lookup_max (#15348)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-03-23 10:41:44 -07:00
committed by GitHub
parent 6ebaf9ac71
commit b9bd76ca14
3 changed files with 67 additions and 5 deletions

View File

@@ -2,7 +2,8 @@
import numpy as np
from vllm.v1.spec_decode.ngram_proposer import (_find_subarray_kmp,
from vllm.v1.spec_decode.ngram_proposer import (NgramProposer,
_find_subarray_kmp,
_kmp_lps_array)
@@ -35,3 +36,53 @@ def test_find_subarray_kmp():
# Return on the first match
np.testing.assert_array_equal(_find_subarray_kmp(X, 1, 3),
np.array([6, 2, 3]))
def test_ngram_proposer():
proposer = NgramProposer()
# No match.
result = proposer.propose(
context_token_ids=np.array([1, 2, 3, 4, 5]),
min_n=2,
max_n=2,
k=2,
)
assert result is None
# No match for 4-gram.
result = proposer.propose(
context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]),
min_n=4,
max_n=4,
k=2,
)
assert result is None
# No match for 4-gram but match for 3-gram.
result = proposer.propose(
context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]),
min_n=3,
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([4, 1]))
# Match for both 4-gram and 3-gram.
# In this case, the proposer should return the 4-gram match.
result = proposer.propose(
context_token_ids=np.array([2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4]),
min_n=3,
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([1, 2])) # Not [5, 1]
# Match for 2-gram and 3-gram, but not 4-gram.
result = proposer.propose(
context_token_ids=np.array([3, 4, 5, 2, 3, 4, 1, 2, 3, 4]),
min_n=2,
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([1, 2])) # Not [5, 2]