[Bug fix][Core] assert num_new_tokens == 1 fails when SamplingParams.n is not 1 and max_tokens is large & Add tests for preemption (#4451)

This commit is contained in:
SangBin Cho
2024-05-02 11:24:13 +09:00
committed by GitHub
parent b8afa8b95a
commit 0d62fe58db
6 changed files with 172 additions and 13 deletions

View File

@@ -55,7 +55,6 @@ def test_models(
)
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
del vllm_model
print(vllm_outputs[0])
for i in range(len(example_prompts)):
hf_output_ids, hf_output_str = hf_outputs[i]