Add pipeline parallel support to TransformersModel (#12832)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Harry Mellor
2025-03-25 02:41:45 +00:00
committed by GitHub
parent 911c8eb000
commit 97cfa65df7
4 changed files with 244 additions and 87 deletions

View File

@@ -472,6 +472,16 @@ class PPMissingLayer(torch.nn.Identity):
def __init__(self, *args, **kwargs):
super().__init__()
self.return_tuple = kwargs.get("return_tuple", False)
def forward(self, *args, **kwargs):
"""
Return the first arg from args or the first value from kwargs.
Wraps the input in a tuple if `self.return_tuple` is True.
"""
input = args[0] if args else next(iter(kwargs.values()))
return (input, ) if self.return_tuple else input
_CPU_OFFLOAD_BYTES = 0
@@ -650,4 +660,4 @@ def cast_overflow_tensors(
if tensors.isinf().any() or tensors.isnan().any():
clamp_value = torch.finfo(tensors.dtype).max - offset
tensors = torch.clamp(tensors, min=-clamp_value, max=clamp_value)
return tensors
return tensors