[CI] Prune Quantization Tests and skip compilation (#27038)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-10-16 17:26:35 -04:00
committed by GitHub
parent b3dda72c23
commit 01c977e96d
9 changed files with 62 additions and 134 deletions

View File

@@ -40,7 +40,9 @@ def test_gptq_with_dynamic(
GPTQMarlinLinearMethod if use_marlin_kernel else (GPTQLinearMethod)
)
with vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) as llm:
with vllm_runner(
model_id, dtype=torch.float16, max_model_len=2048, enforce_eager=True
) as llm:
def check_model(model):
for name, submodule in model.named_modules():