[CI] Prune Quantization Tests and skip compilation (#27038)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -31,7 +31,9 @@ def test_lm_head(
|
||||
) -> None:
|
||||
# `LLM.apply_model` requires pickling a function.
|
||||
monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
||||
with vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) as vllm_model:
|
||||
with vllm_runner(
|
||||
model_id, dtype=torch.float16, max_model_len=2048, enforce_eager=True
|
||||
) as vllm_model:
|
||||
|
||||
def check_model(model):
|
||||
lm_head_layer = model.lm_head
|
||||
|
||||
Reference in New Issue
Block a user