[TPU][V1] Add support for top-logprobs (#17072)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -61,3 +61,51 @@ def test_sampler_different(model_name: str):
|
||||
# to have deterministic results over many tokens, tests the first ~20
|
||||
# tokens match.
|
||||
assert output[0].outputs[0].text[:20] == output[1].outputs[0].text[:20]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
|
||||
# TODO TPU will appear busy if we fan-out test params here
|
||||
@pytest.mark.parametrize("n_prompts", [1])
|
||||
@pytest.mark.skipif(not current_platform.is_tpu(),
|
||||
reason="This test needs a TPU")
|
||||
def test_logprobs(model_name: str, n_prompts: int):
|
||||
"""
|
||||
Request top logprobs with different sampling settings and check
|
||||
that results contains the requested number, ordered ascendingly.
|
||||
"""
|
||||
|
||||
def check_num_logprobs(logprobs, expected_num: int):
|
||||
for step in logprobs:
|
||||
prev_logp = 1.0
|
||||
# order by rank
|
||||
sorted_step = dict(
|
||||
sorted(step.items(), key=lambda item: item[1].rank))
|
||||
|
||||
# Can contain the sampled token
|
||||
assert len(step) == expected_num or len(step) == expected_num + 1
|
||||
# Check results are ordered by prob value
|
||||
for rankno, (tid, logp) in enumerate(sorted_step.items()):
|
||||
assert logp.logprob <= prev_logp
|
||||
prev_logp = logp.logprob
|
||||
assert logp.rank == rankno + 1
|
||||
|
||||
llm = LLM(model_name,
|
||||
enforce_eager=False,
|
||||
max_num_seqs=1,
|
||||
max_model_len=128,
|
||||
max_num_batched_tokens=128)
|
||||
prompts = [
|
||||
"Write a short story about a robot that dreams for the first time."
|
||||
] * n_prompts
|
||||
greedy_sampling_params = SamplingParams(temperature=0.0, max_tokens=64,\
|
||||
logprobs=4)
|
||||
regular_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\
|
||||
logprobs=4)
|
||||
topkp_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\
|
||||
logprobs=4, top_k=12, top_p=0.5)
|
||||
|
||||
for sp in [greedy_sampling_params, regular_sampling_params, \
|
||||
topkp_sampling_params]:
|
||||
output = llm.generate(prompts, sp)
|
||||
for o in output:
|
||||
check_num_logprobs(o.outputs[0].logprobs, 4)
|
||||
|
||||
Reference in New Issue
Block a user