Revert "[compile] Initialize passes at VllmBackend init" (#37733)
This commit is contained in:
@@ -32,9 +32,9 @@ from vllm.platforms import current_platform
|
||||
|
||||
def test_compile_config_repr_succeeds():
|
||||
# setup: VllmBackend mutates the config object
|
||||
# Note: VllmBackend.__init__ already calls configure_post_pass()
|
||||
config = VllmConfig()
|
||||
_ = VllmBackend(config)
|
||||
backend = VllmBackend(config)
|
||||
backend.configure_post_pass()
|
||||
|
||||
# test that repr(config) succeeds
|
||||
val = repr(config)
|
||||
|
||||
@@ -836,18 +836,8 @@ class VllmBackend:
|
||||
# in future we need PostGradPassManager.uuid() to be executed
|
||||
# only at compile time.
|
||||
self.inductor_config = deepcopy(self.compilation_config.inductor_compile_config)
|
||||
|
||||
# Configure post-grad passes (including AllReduceFusionPass) during
|
||||
# backend init rather than at torch.compile time, so that expensive
|
||||
# one-time setup (e.g. FlashInfer workspace allocation) is not
|
||||
# attributed to compilation latency.
|
||||
start = time.time()
|
||||
self.configure_post_pass()
|
||||
logger.info_once(
|
||||
"Post-grad pass configuration time: %.2f s",
|
||||
time.time() - start,
|
||||
scope="local",
|
||||
)
|
||||
# `torch.compile` is JIT compiled, so we don't need to
|
||||
# do anything here
|
||||
|
||||
def collect_standalone_compile_artifacts(
|
||||
self,
|
||||
@@ -1128,6 +1118,7 @@ class VllmBackend:
|
||||
assert not self._called, "VllmBackend can only be called once"
|
||||
|
||||
self.graph = graph
|
||||
self.configure_post_pass()
|
||||
|
||||
if self.compilation_config.use_inductor_graph_partition:
|
||||
# Let Inductor decide partitioning; avoid FX-level pre-splitting.
|
||||
|
||||
@@ -380,11 +380,6 @@ def _support_torch_compile(
|
||||
compilation_counter.num_models_seen += 1
|
||||
self.compiled = False
|
||||
|
||||
# Skip if a parent class's @support_torch_compile already
|
||||
# initialized the compile wrapper
|
||||
if hasattr(self, "_compiled_callable"):
|
||||
return
|
||||
|
||||
# Handled by monkeypatching `TorchCompileWithNoGuardsWrapper` into base class
|
||||
TorchCompileWithNoGuardsWrapper.__init__(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user