diff --git a/vllm/nvfp4_cutedsl.py b/vllm/nvfp4_cutedsl.py index 03814ea3..a2e1193d 100644 --- a/vllm/nvfp4_cutedsl.py +++ b/vllm/nvfp4_cutedsl.py @@ -86,12 +86,6 @@ class CuTeDSLMoERunner: self.max_num_tokens, device=self.device, dtype=torch.int32 ).unsqueeze(1).expand(-1, self.top_k).contiguous().view(-1) - # Force GPU sync and verify - torch.cuda.synchronize() - _check = self._token_indices[:8].cpu().tolist() - assert _check == [0, 0, 1, 1, 2, 2, 3, 3], \ - f"Token indices corrupted: {_check}" - self._expert_id_range = torch.arange(self.num_experts, device=self.device) self._expert_offsets_buf = torch.zeros(