[V1] V1 Enablement Oracle (#13726)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
Robert Shaw
2025-03-15 01:02:20 -04:00
committed by GitHub
parent 8c0d15d5c5
commit d4d93db2c5
96 changed files with 1537 additions and 512 deletions

View File

@@ -24,6 +24,18 @@ logger = init_logger("test_pipeline_parallel")
VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1"
@pytest.fixture(scope="function", autouse=True)
def use_v0_only(monkeypatch):
"""
For PP, we fall back to V0 by default. This means
that the TP baseline runs with V1 while the PP engine
runs with V0. This gives divergent results with dummy
weights. Once we enable V1 by default for PP, we can
remove this.
"""
monkeypatch.setenv('VLLM_USE_V1', '0')
class ParallelSetup(NamedTuple):
tp_size: int
pp_size: int