[misc] hide best_of from engine (#9261)
Co-authored-by: Brendan Wong <bjwpokemon@gmail.com>
This commit is contained in:
@@ -767,7 +767,7 @@ class LLMEngine:
|
||||
Details:
|
||||
- Set arrival_time to the current time if it is None.
|
||||
- Set prompt_token_ids to the encoded prompt if it is None.
|
||||
- Create `best_of` number of :class:`~vllm.Sequence` objects.
|
||||
- Create `n` number of :class:`~vllm.Sequence` objects.
|
||||
- Create a :class:`~vllm.SequenceGroup` object
|
||||
from the list of :class:`~vllm.Sequence`.
|
||||
- Add the :class:`~vllm.SequenceGroup` object to the scheduler.
|
||||
@@ -1242,8 +1242,7 @@ class LLMEngine:
|
||||
if seq_group_metadata.do_sample:
|
||||
assert len(sequence_group_outputs.samples) == 1, (
|
||||
"Async output processor expects a single sample"
|
||||
" (i.e sampling_params.n == 1 and no "
|
||||
"sampling_params.best_of > 1)")
|
||||
" (i.e sampling_params.n == 1)")
|
||||
sample = sequence_group_outputs.samples[0]
|
||||
|
||||
assert len(seq_group.seqs) == 1
|
||||
@@ -1612,7 +1611,6 @@ class LLMEngine:
|
||||
# Metadata
|
||||
num_prompt_tokens_requests: List[int] = []
|
||||
num_generation_tokens_requests: List[int] = []
|
||||
best_of_requests: List[int] = []
|
||||
n_requests: List[int] = []
|
||||
finished_reason_requests: List[str] = []
|
||||
|
||||
@@ -1683,8 +1681,6 @@ class LLMEngine:
|
||||
for seq in seq_group.get_finished_seqs()
|
||||
])
|
||||
if seq_group.sampling_params is not None:
|
||||
best_of_requests.append(
|
||||
seq_group.sampling_params.best_of)
|
||||
n_requests.append(seq_group.sampling_params.n)
|
||||
finished_reason_requests.extend([
|
||||
SequenceStatus.get_finished_reason(seq.status)
|
||||
@@ -1737,7 +1733,6 @@ class LLMEngine:
|
||||
# Metadata
|
||||
num_prompt_tokens_requests=num_prompt_tokens_requests,
|
||||
num_generation_tokens_requests=num_generation_tokens_requests,
|
||||
best_of_requests=best_of_requests,
|
||||
n_requests=n_requests,
|
||||
finished_reason_requests=finished_reason_requests,
|
||||
)
|
||||
@@ -1824,8 +1819,6 @@ class LLMEngine:
|
||||
seq_group.sampling_params.top_p)
|
||||
seq_span.set_attribute(SpanAttributes.LLM_REQUEST_MAX_TOKENS,
|
||||
seq_group.sampling_params.max_tokens)
|
||||
seq_span.set_attribute(SpanAttributes.LLM_REQUEST_BEST_OF,
|
||||
seq_group.sampling_params.best_of)
|
||||
seq_span.set_attribute(SpanAttributes.LLM_REQUEST_N,
|
||||
seq_group.sampling_params.n)
|
||||
seq_span.set_attribute(SpanAttributes.LLM_USAGE_NUM_SEQUENCES,
|
||||
|
||||
Reference in New Issue
Block a user