Deprecate best_of Sampling Parameter in anticipation for vLLM V1 (#13997)

Signed-off-by: vincent-4 <vincentzhongy+githubvincent4@gmail.com>
Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Brayden Zhong <b8zhong@uwaterloo.ca>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Vincent
2025-03-05 15:22:43 -05:00
committed by GitHub
parent a32c8669ca
commit a4f1ee35d6
12 changed files with 16 additions and 88 deletions

View File

@@ -27,7 +27,6 @@ class RequestFuncInput:
output_len: int
model: str
model_name: Optional[str] = None
best_of: int = 1
logprobs: Optional[int] = None
extra_body: Optional[dict] = None
multi_modal_content: Optional[dict] = None
@@ -58,7 +57,6 @@ async def async_request_tgi(
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
params = {
"best_of": request_func_input.best_of,
"max_new_tokens": request_func_input.output_len,
"do_sample": True,
"temperature": 0.01, # TGI does not accept 0.0 temperature.
@@ -130,7 +128,6 @@ async def async_request_trt_llm(
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
assert request_func_input.best_of == 1
payload = {
"accumulate_tokens": True,
"text_input": request_func_input.prompt,
@@ -195,7 +192,6 @@ async def async_request_deepspeed_mii(
) -> RequestFuncOutput:
async with aiohttp.ClientSession(trust_env=True,
timeout=AIOHTTP_TIMEOUT) as session:
assert request_func_input.best_of == 1
payload = {
"prompt": request_func_input.prompt,
@@ -249,7 +245,6 @@ async def async_request_openai_completions(
if request_func_input.model_name else request_func_input.model,
"prompt": request_func_input.prompt,
"temperature": 0.0,
"best_of": request_func_input.best_of,
"max_tokens": request_func_input.output_len,
"logprobs": request_func_input.logprobs,
"stream": True,