Use NCCL instead of ray for control-plane communication to remove serialization overhead (#2221)

This commit is contained in:
Zhuohan Li
2024-01-04 03:30:22 +08:00
committed by GitHub
parent 1066cbd152
commit fd4ea8ef5c
34 changed files with 524 additions and 262 deletions

View File

@@ -254,7 +254,7 @@ class GPTBigCodeForCausalLM(nn.Module):
self,
hidden_states: torch.Tensor,
sampling_metadata: SamplingMetadata,
) -> SamplerOutput:
) -> Optional[SamplerOutput]:
next_tokens = self.sampler(self.lm_head_weight, hidden_states,
sampling_metadata)
return next_tokens