[Bugfix][EP+DP] Use pplx-kernel internode instead of intranode (#19034)
Signed-off-by: Tyler Michael Smith <tysmith@redhat.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
5bc1ad6cee
commit
8a57872b2a
@@ -83,6 +83,10 @@ class PPLXAll2AllManager(All2AllManagerBase):
|
||||
assert has_pplx, "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels." # noqa
|
||||
super().__init__(cpu_group)
|
||||
|
||||
# TODO(tms): Disable pplx-a2a intranode as it fails with the error:
|
||||
# failed: cuda error /app/pplx/csrc/all_to_all/intranode.cpp:84 'invalid resource handle' # noqa
|
||||
self.internode = True
|
||||
|
||||
if self.internode:
|
||||
# inter-node communication needs nvshmem,
|
||||
# intra-node communication uses p2p mapping directly
|
||||
|
||||
Reference in New Issue
Block a user