[Frontend][torch.compile] CompilationConfig Overhaul (#20283): name change compilation level to compilation mode, deprecation compilation level (#26355)
Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
e66d787bce
commit
96b9aa5aa0
@@ -10,7 +10,7 @@ import pytest
|
||||
from tests.utils import wait_for_gpu_memory_to_clear
|
||||
from tests.v1.attention.utils import full_cg_backend_configs as backend_configs
|
||||
from vllm import LLM
|
||||
from vllm.config import CompilationConfig
|
||||
from vllm.config import CompilationConfig, CompilationMode
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
|
||||
gpu_memory_utilization=0.45,
|
||||
max_model_len=1024,
|
||||
compilation_config=CompilationConfig(
|
||||
level=3, cudagraph_mode=cudagraph_mode
|
||||
mode=CompilationMode.VLLM_COMPILE, cudagraph_mode=cudagraph_mode
|
||||
),
|
||||
)
|
||||
llm.generate(["Hello, my name is"] * 10)
|
||||
@@ -90,32 +90,27 @@ def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, supporte
|
||||
)
|
||||
|
||||
|
||||
# test cudagraph_mode with different compilation level.
|
||||
# (backend_name, cudagraph_mode, compilation_level, supported)
|
||||
# test cudagraph_mode with different compilation mode.
|
||||
# (backend_name, cudagraph_mode, compilation_mode, supported)
|
||||
combo_cases_2 = [
|
||||
("FA2", "FULL", 0, True), # no compilation + full cudagraph
|
||||
("FA2", "FULL", 3, True), # piecewise compilation + full cudagraph
|
||||
("FA2", "PIECEWISE", 0, False), # no compilation + piecewise cudagraph
|
||||
("FA2", "PIECEWISE", 3, True), # piecewise compilation + piecewise cudagraph
|
||||
(
|
||||
"FA2",
|
||||
"FULL_AND_PIECEWISE",
|
||||
0,
|
||||
False,
|
||||
), # piecewise cudagraph not supported without piecewise compilation
|
||||
("FA2", "FULL_AND_PIECEWISE", 3, True),
|
||||
("FA2", "FULL_DECODE_ONLY", 0, True),
|
||||
("FA2", "FULL_DECODE_ONLY", 3, True),
|
||||
("FA2", "NONE", 0, True), # no compilation + no cudagraph
|
||||
("FA2", "NONE", 3, True), # piecewise compilation + no cudagraph
|
||||
("FA2", "FULL", CompilationMode.NONE, True),
|
||||
("FA2", "FULL", CompilationMode.VLLM_COMPILE, True),
|
||||
("FA2", "PIECEWISE", CompilationMode.NONE, False),
|
||||
("FA2", "PIECEWISE", CompilationMode.VLLM_COMPILE, True),
|
||||
("FA2", "FULL_AND_PIECEWISE", CompilationMode.NONE, False),
|
||||
("FA2", "FULL_AND_PIECEWISE", CompilationMode.VLLM_COMPILE, True),
|
||||
("FA2", "FULL_DECODE_ONLY", CompilationMode.NONE, True),
|
||||
("FA2", "FULL_DECODE_ONLY", CompilationMode.VLLM_COMPILE, True),
|
||||
("FA2", "NONE", CompilationMode.NONE, True),
|
||||
("FA2", "NONE", CompilationMode.VLLM_COMPILE, True),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"backend_name,cudagraph_mode,compilation_level,supported", combo_cases_2
|
||||
"backend_name,cudagraph_mode,compilation_mode,supported", combo_cases_2
|
||||
)
|
||||
def test_cudagraph_compilation_combo(combo_case):
|
||||
backend_name, cudagraph_mode, compilation_level, supported = combo_case
|
||||
backend_name, cudagraph_mode, compilation_mode, supported = combo_case
|
||||
|
||||
env_vars = backend_configs[backend_name].env_vars
|
||||
|
||||
@@ -130,7 +125,7 @@ def test_cudagraph_compilation_combo(combo_case):
|
||||
gpu_memory_utilization=0.45,
|
||||
max_model_len=1024,
|
||||
compilation_config=CompilationConfig(
|
||||
level=compilation_level, cudagraph_mode=cudagraph_mode
|
||||
mode=compilation_mode, cudagraph_mode=cudagraph_mode
|
||||
),
|
||||
)
|
||||
llm.generate(["Hello, my name is"] * 10)
|
||||
|
||||
Reference in New Issue
Block a user