[Bugfix]: Clean up chunked prefill logging when using whisper (#25075)
Signed-off-by: simondanielsson <simon.danielsson99@hotmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import dataclasses
|
||||
from typing import Optional
|
||||
from unittest.mock import Mock
|
||||
|
||||
@@ -1899,4 +1900,53 @@ def test_priority_scheduling_preemption_when_out_of_kv():
|
||||
assert output.scheduled_cached_reqs.num_reqs == 1
|
||||
assert output.scheduled_cached_reqs.req_ids[0] == request_high.request_id
|
||||
assert len(scheduler.waiting) == 1
|
||||
assert len(scheduler.running) == 1
|
||||
assert len(scheduler.running) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("enable_chunked_prefill", "is_encoder_decoder", "expect_enabled"),
|
||||
[
|
||||
(True, False, True),
|
||||
(False, False, False),
|
||||
# Encoder-decoder models should always have it disabled
|
||||
(False, True, False),
|
||||
(True, True, False),
|
||||
])
|
||||
def test_chunked_prefill_disabled_for_encoder_decoder(
|
||||
enable_chunked_prefill: bool, is_encoder_decoder: bool,
|
||||
expect_enabled: bool) -> None:
|
||||
"""Validate that chunked prefill is appropriately disabled for
|
||||
encoder-decoder models."""
|
||||
scheduler_config = SchedulerConfig(
|
||||
enable_chunked_prefill=enable_chunked_prefill,
|
||||
is_encoder_decoder=is_encoder_decoder,
|
||||
)
|
||||
|
||||
# `is_encoder_decoder` should only be used during construction
|
||||
# of the config, and otherwise stored in the model config.
|
||||
assert "is_encoder_decoder" not in vars(scheduler_config)
|
||||
assert "is_encoder_decoder" not in [
|
||||
f.name for f in dataclasses.fields(scheduler_config)
|
||||
]
|
||||
_validate_chunked_prefill_settings_for_encoder_decoder(
|
||||
scheduler_config, is_encoder_decoder, expect_enabled)
|
||||
|
||||
# Ensure it is retained in VllmConfig, even after its post-init.
|
||||
vllm_config = VllmConfig(scheduler_config=scheduler_config)
|
||||
_validate_chunked_prefill_settings_for_encoder_decoder(
|
||||
vllm_config.scheduler_config, is_encoder_decoder, expect_enabled)
|
||||
|
||||
|
||||
def _validate_chunked_prefill_settings_for_encoder_decoder(
|
||||
scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
|
||||
expect_enabled: bool) -> None:
|
||||
"""Validate chunked prefill settings in the scheduler config for
|
||||
encoder-decoder models."""
|
||||
assert scheduler_config.chunked_prefill_enabled is expect_enabled
|
||||
assert scheduler_config.enable_chunked_prefill is expect_enabled
|
||||
if is_encoder_decoder:
|
||||
# Encoder-decoder models should automatically disable chunked multimodal
|
||||
# inputs as well
|
||||
assert scheduler_config.disable_chunked_mm_input is not expect_enabled
|
||||
if is_encoder_decoder and not expect_enabled:
|
||||
assert scheduler_config.long_prefill_token_threshold == 0
|
||||
|
||||
Reference in New Issue
Block a user