[V0 Deprecation] Remove LLMEngine (#25033)

Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon
2025-09-20 17:56:30 -07:00
committed by GitHub
parent 367a480bd3
commit 52c2a8d4ad
29 changed files with 65 additions and 2763 deletions

View File

@@ -122,11 +122,12 @@ def test_cumem_with_cudagraph():
# sleep mode with safetensors
("meta-llama/Llama-3.2-1B", True),
# sleep mode with pytorch checkpoint
("facebook/opt-125m", False),
("facebook/opt-125m", True),
])
def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool):
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1" if use_v1 else "0")
assert use_v1
m.setenv("VLLM_USE_V1", "1")
free, total = torch.cuda.mem_get_info()
used_bytes_baseline = total - free # in case other process is running
llm = LLM(model, enable_sleep_mode=True)