[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-09-21 01:50:58 +08:00
committed by GitHub
parent d88918e4c2
commit bef180f009
8 changed files with 195 additions and 214 deletions

View File

@@ -45,12 +45,15 @@ def run_awq_test(
# will hurt multiprocessing backend with fork method (the default method).
# max_model_len should be greater than image_feature_size
with vllm_runner(source_model,
max_model_len=4096,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True) as vllm_model:
with vllm_runner(
source_model,
max_model_len=4096,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True,
default_torch_num_threads=1,
) as vllm_model:
source_outputs_per_image = [
vllm_model.generate_greedy_logprobs(prompts,
max_tokens,
@@ -59,13 +62,16 @@ def run_awq_test(
for prompts, images in inputs_per_image
]
with vllm_runner(quant_model,
quantization="awq",
max_model_len=4096,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True) as vllm_model:
with vllm_runner(
quant_model,
quantization="awq",
max_model_len=4096,
dtype=dtype,
tensor_parallel_size=tensor_parallel_size,
distributed_executor_backend=distributed_executor_backend,
enforce_eager=True,
default_torch_num_threads=1,
) as vllm_model:
quant_outputs_per_image = [
vllm_model.generate_greedy_logprobs(prompts,
max_tokens,
@@ -108,12 +114,8 @@ def run_awq_test(
@pytest.mark.parametrize("num_logprobs", [5])
@torch.inference_mode()
def test_awq_models(vllm_runner, image_assets, source_model, quant_model,
size_factors, dtype, max_tokens, num_logprobs,
monkeypatch) -> None:
size_factors, dtype, max_tokens, num_logprobs) -> None:
# Test V1: this test hangs during setup on single-scale input.
# TODO: figure out why and re-enable this on V1.
monkeypatch.setenv("VLLM_USE_V1", "0")
run_awq_test(
vllm_runner,
image_assets,