[XPU] Fix MOE DP accuracy issue on XPU (#25465)

This commit is contained in:
Fanli Lin
2025-09-23 22:32:57 +08:00
committed by GitHub
parent da5e7e4329
commit 4c966e440e
2 changed files with 29 additions and 1 deletions

View File

@@ -101,6 +101,13 @@ def parse_args():
"--quantization",
type=str,
)
parser.add_argument(
"--disable-expert-parallel",
dest="enable_expert_parallel",
action="store_false",
help="Disable expert parallel (default: enabled).",
)
parser.set_defaults(enable_expert_parallel=True)
return parser.parse_args()
@@ -113,6 +120,7 @@ def main(
dp_master_port,
GPUs_per_dp_rank,
enforce_eager,
enable_expert_parallel,
trust_remote_code,
max_num_seqs,
max_model_len,
@@ -168,7 +176,7 @@ def main(
model=model,
tensor_parallel_size=GPUs_per_dp_rank,
enforce_eager=enforce_eager,
enable_expert_parallel=True,
enable_expert_parallel=enable_expert_parallel,
trust_remote_code=trust_remote_code,
max_num_seqs=max_num_seqs,
max_model_len=max_model_len,
@@ -229,6 +237,7 @@ if __name__ == "__main__":
dp_master_port,
tp_size,
args.enforce_eager,
args.enable_expert_parallel,
args.trust_remote_code,
args.max_num_seqs,
args.max_model_len,