[EPLB][Cleanup] Remove is_async_enabled from EplbModelState (#32050)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
Sage Moore
2026-01-13 10:19:03 -08:00
committed by GitHub
parent ab74b2a27a
commit 6beef12b9b
2 changed files with 8 additions and 20 deletions

View File

@@ -30,6 +30,7 @@ def start_async_worker(
ep_group = get_ep_group().device_group
rank = ep_group.rank()
device_index = state.cuda_device_index
assert state.is_async
def thread_target() -> None:
assert device_index is not None
@@ -42,9 +43,9 @@ def start_async_worker(
transfer_run_periodically(
state=state,
ep_group=ep_group,
cuda_stream=cuda_stream,
is_profile=is_profile,
rank_mapping=rank_mapping,
cuda_stream=cuda_stream,
)
)
except Exception as exc: # pragma: no cover - diagnostic path
@@ -60,17 +61,16 @@ def start_async_worker(
async def transfer_run_periodically(
state: "EplbState",
ep_group: ProcessGroup,
cuda_stream: torch.cuda.Stream,
is_profile: bool = False,
rank_mapping: dict[int, int] | None = None,
cuda_stream: torch.cuda.Stream = None,
) -> None:
while True:
await asyncio.to_thread(state.rearrange_event.wait)
logger.info("async worker woke up for EPLB transfer")
assert state.is_async
for model_state in state.model_states.values():
if not model_state.is_async_enabled:
continue
current_num_layers = model_state.model.num_moe_layers
while (
model_state.rebalanced

View File

@@ -182,10 +182,6 @@ class EplbModelState:
"""
intermediate variable between `move_to_buffer` and `move_to_workspace`.
"""
is_async_enabled: bool
"""
The flag indicates whether the EPLB is running in async mode.
"""
cuda_device_index: int | None
"""
CUDA device index for the async EPLB worker thread.
@@ -518,7 +514,6 @@ class EplbState:
recv_expert_ids=np.array([]),
recv_dst_rows=np.array([]),
),
is_async_enabled=self.is_async,
cuda_device_index=self.cuda_device_index,
new_physical_to_logical_map=new_physical_to_logical_map,
new_logical_to_physical_map=new_logical_to_physical_map,
@@ -630,19 +625,12 @@ class EplbState:
if self.is_async:
for eplb_model_state in self.model_states.values():
if not eplb_model_state.is_async_enabled:
continue
all_ranks_buffer_ready = False
if eplb_model_state.pending_global_ready_check:
all_ranks_buffer_ready = self._all_ranks_buffer_ready(
eplb_model_state
)
if (
eplb_model_state.is_async_enabled
and eplb_model_state.ep_buffer_ready
and all_ranks_buffer_ready
):
if eplb_model_state.ep_buffer_ready and all_ranks_buffer_ready:
self.move_to_workspace(
model_state=eplb_model_state,
ep_group=ep_group,
@@ -664,8 +652,8 @@ class EplbState:
)
if self.expert_rearrangement_step >= self.expert_rearrangement_step_interval:
if any(
eplb_model_state.is_async_enabled and eplb_model_state.rebalanced
if self.is_async and any(
eplb_model_state.rebalanced
for eplb_model_state in self.model_states.values()
):
# Still performing asynchronous rearrangement
@@ -822,7 +810,7 @@ class EplbState:
eplb_model_state.physical_to_logical_map,
)
if not eplb_model_state.is_async_enabled or is_profile:
if not self.is_async or is_profile:
# Update expert weights
rearrange_expert_weights_inplace(
eplb_model_state.physical_to_logical_map,