[CI][torch.compile] Reduce e2e fusion test time (#33293)
Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: ProExpertProg <luka.govedic@gmail.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@@ -766,7 +766,12 @@ class VllmConfig:
|
||||
if self.compilation_config.pass_config.fuse_gemm_comms:
|
||||
self.compilation_config.pass_config.enable_sp = True
|
||||
if self.compilation_config.pass_config.enable_sp:
|
||||
if "-rms_norm" in self.compilation_config.custom_ops:
|
||||
if self.parallel_config.tensor_parallel_size == 1:
|
||||
logger.warning("Sequence Parallelism requires TP>1, disabling")
|
||||
self.compilation_config.pass_config.enable_sp = False
|
||||
self.compilation_config.pass_config.fuse_gemm_comms = False
|
||||
|
||||
elif "-rms_norm" in self.compilation_config.custom_ops:
|
||||
logger.warning(
|
||||
"RMS norm force disabled, sequence parallelism might break"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user