Deprecate best_of Sampling Parameter in anticipation for vLLM V1 (#13997)

Signed-off-by: vincent-4 <vincentzhongy+githubvincent4@gmail.com> Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-03-05 15:22:43 -05:00
parent a32c8669ca
commit a4f1ee35d6
12 changed files with 16 additions and 88 deletions
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -617,7 +617,6 @@ def test_schedule_decode_blocks_to_copy_update():
                                     num_gpu_blocks=16)
    _, seq_group = create_dummy_prompt("1",
                                       prompt_length=60,
-                                       best_of=2,
                                       block_size=block_size)
    curr_loras = None
    scheduler._allocate_and_set_running(seq_group)
@@ -686,7 +685,6 @@ def test_schedule_swapped_cannot_swap_in():
    for i in range(2):
        _, seq_group = create_dummy_prompt(str(i),
                                           prompt_length=60,
-                                           best_of=2,
                                           block_size=block_size)
        scheduler._allocate_and_set_running(seq_group)
        append_new_token_seq_group(60, seq_group, 1)
@@ -717,7 +715,6 @@ def test_infeasible_swap():
    for i in range(2):
        _, seq_group = create_dummy_prompt(str(i),
                                           prompt_length=60,
-                                           best_of=2,
                                           block_size=block_size)
        scheduler._allocate_and_set_running(seq_group)
        append_new_token_seq_group(60, seq_group, 1)
@@ -747,7 +744,6 @@ def test_schedule_swapped_blocks_to_copy():
    curr_loras = None
    _, seq_group = create_dummy_prompt("1",
                                       prompt_length=60,
-                                       best_of=2,
                                       block_size=block_size)
    scheduler._allocate_and_set_running(seq_group)
    append_new_token_seq_group(60, seq_group, 1)