[MISC][Bugfix] Use less CPU when message queue has been empty for some time (#16226)
Signed-off-by: Povilas Kanapickas <povilas@radix.lt>
This commit is contained in:
committed by
GitHub
parent
61059bee40
commit
85e2b7bb13
@@ -128,15 +128,21 @@ def test_models(
|
||||
@multi_gpu_test(num_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
"model, distributed_executor_backend, attention_backend, "
|
||||
"test_suite", [
|
||||
("distilbert/distilgpt2", "ray", "", "L4"),
|
||||
("distilbert/distilgpt2", "mp", "", "L4"),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "ray", "", "L4"),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "mp", "", "L4"),
|
||||
("distilbert/distilgpt2", "ray", "", "A100"),
|
||||
("distilbert/distilgpt2", "mp", "", "A100"),
|
||||
("distilbert/distilgpt2", "mp", "FLASHINFER", "A100"),
|
||||
("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100"),
|
||||
"test_suite, extra_env", [
|
||||
("distilbert/distilgpt2", "ray", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "mp", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "ray", "", "L4", {
|
||||
"VLLM_SLEEP_WHEN_IDLE": "1"
|
||||
}),
|
||||
("distilbert/distilgpt2", "mp", "", "L4", {
|
||||
"VLLM_SLEEP_WHEN_IDLE": "1"
|
||||
}),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "ray", "", "L4", {}),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", "mp", "", "L4", {}),
|
||||
("distilbert/distilgpt2", "ray", "", "A100", {}),
|
||||
("distilbert/distilgpt2", "mp", "", "A100", {}),
|
||||
("distilbert/distilgpt2", "mp", "FLASHINFER", "A100", {}),
|
||||
("meta-llama/Meta-Llama-3-8B", "ray", "FLASHINFER", "A100", {}),
|
||||
])
|
||||
@pytest.mark.parametrize("enable_prompt_embeds", [True, False])
|
||||
def test_models_distributed(
|
||||
@@ -148,6 +154,7 @@ def test_models_distributed(
|
||||
distributed_executor_backend: str,
|
||||
attention_backend: str,
|
||||
test_suite: str,
|
||||
extra_env: dict[str, str],
|
||||
enable_prompt_embeds: bool,
|
||||
) -> None:
|
||||
|
||||
@@ -173,6 +180,9 @@ def test_models_distributed(
|
||||
attention_backend,
|
||||
)
|
||||
|
||||
for k, v in extra_env.items():
|
||||
monkeypatch_context.setenv(k, v)
|
||||
|
||||
dtype = "half"
|
||||
max_tokens = 5
|
||||
|
||||
|
||||
Reference in New Issue
Block a user