[torch.compile] Add encoder tag for compilation (#30489)
Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
@@ -463,21 +463,27 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
|
||||
# the tag for the part of model being compiled,
|
||||
# e.g. backbone/eagle_head
|
||||
model_tag: str = "backbone"
|
||||
model_is_encoder: bool = False
|
||||
|
||||
|
||||
@contextmanager
|
||||
def set_model_tag(tag: str):
|
||||
def set_model_tag(tag: str, is_encoder: bool = False):
|
||||
"""Context manager to set the model tag."""
|
||||
global model_tag
|
||||
global model_is_encoder
|
||||
assert tag != model_tag, (
|
||||
f"Model tag {tag} is the same as the current tag {model_tag}."
|
||||
)
|
||||
old_tag = model_tag
|
||||
old_is_encoder = model_is_encoder
|
||||
|
||||
model_tag = tag
|
||||
model_is_encoder = is_encoder
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
model_tag = old_tag
|
||||
model_is_encoder = old_is_encoder
|
||||
|
||||
|
||||
class VllmBackend:
|
||||
@@ -523,6 +529,9 @@ class VllmBackend:
|
||||
# them, e.g. backbone (default), eagle_head, etc.
|
||||
self.prefix = prefix or model_tag
|
||||
|
||||
# Mark compilation for encoder.
|
||||
self.is_encoder = model_is_encoder
|
||||
|
||||
# Passes to run on the graph post-grad.
|
||||
self.pass_manager = resolve_obj_by_qualname(
|
||||
current_platform.get_pass_manager_cls()
|
||||
|
||||
@@ -53,12 +53,7 @@ class PiecewiseBackend:
|
||||
self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1
|
||||
|
||||
self.is_full_graph = total_piecewise_compiles == 1
|
||||
# TODO: we need to generalize encoder compilation to other models
|
||||
self.is_encoder_compilation = vllm_backend.prefix in [
|
||||
"Qwen2_5_VisionPatchEmbed",
|
||||
"Qwen2_5_VisionPatchMerger",
|
||||
"Qwen2_5_VisionBlock",
|
||||
]
|
||||
self.is_encoder_compilation = vllm_backend.is_encoder
|
||||
|
||||
self.compile_ranges = self.compilation_config.get_compile_ranges()
|
||||
if self.is_encoder_compilation:
|
||||
|
||||
Reference in New Issue
Block a user