[ci][distributed] try to fix pp test (#7054)
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
import functools
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -336,3 +338,40 @@ def wait_for_gpu_memory_to_clear(devices: List[int],
|
||||
f'{dur_s=:.02f} ({threshold_bytes/2**30=})')
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
def fork_new_process_for_each_test(f):
|
||||
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
# Make the process the leader of its own process group
|
||||
# to avoid sending SIGTERM to the parent process
|
||||
os.setpgrp()
|
||||
from _pytest.outcomes import Skipped
|
||||
pid = os.fork()
|
||||
if pid == 0:
|
||||
try:
|
||||
f(*args, **kwargs)
|
||||
except Skipped as e:
|
||||
# convert Skipped to exit code 0
|
||||
print(str(e))
|
||||
os._exit(0)
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
os._exit(1)
|
||||
else:
|
||||
os._exit(0)
|
||||
else:
|
||||
pgid = os.getpgid(pid)
|
||||
_pid, _exitcode = os.waitpid(pid, 0)
|
||||
# ignore SIGTERM signal itself
|
||||
old_singla_handler = signal.signal(signal.SIGTERM, signal.SIG_IGN)
|
||||
# kill all child processes
|
||||
os.killpg(pgid, signal.SIGTERM)
|
||||
# restore the signal handler
|
||||
signal.signal(signal.SIGTERM, old_singla_handler)
|
||||
assert _exitcode == 0, (f"function {f} failed when called with"
|
||||
f" args {args} and kwargs {kwargs}")
|
||||
|
||||
return wrapper
|
||||
|
||||
Reference in New Issue
Block a user