Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221)
This commit is contained in:
@@ -254,7 +254,7 @@ class GPTBigCodeForCausalLM(nn.Module):
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
sampling_metadata: SamplingMetadata,
|
||||
) -> SamplerOutput:
|
||||
) -> Optional[SamplerOutput]:
|
||||
next_tokens = self.sampler(self.lm_head_weight, hidden_states,
|
||||
sampling_metadata)
|
||||
return next_tokens
|
||||
|
||||
Reference in New Issue
Block a user