Deprecate best_of Sampling Parameter in anticipation for vLLM V1 (#13997)

Signed-off-by: vincent-4 <vincentzhongy+githubvincent4@gmail.com> Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-03-05 15:22:43 -05:00
parent a32c8669ca
commit a4f1ee35d6
12 changed files with 16 additions and 88 deletions
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -168,12 +168,8 @@ class OpenAIServingCompletion(OpenAIServing):
        model_name = self._get_model_name(request.model, lora_request)
        num_prompts = len(engine_prompts)

-        # Similar to the OpenAI API, when n != best_of, we do not stream the
-        # results. In addition, we do not stream the results when use
-        # beam search.
-        stream = (request.stream
-                  and (request.best_of is None or request.n == request.best_of)
-                  and not request.use_beam_search)
+        # We do not stream the results when use beam search.
+        stream = (request.stream and not request.use_beam_search)

        # Streaming response
        if stream: