run_test.sh: SIGKILL all children of screen session on cleanup
Deadlocked GPU processes ignore SIGHUP from screen -X quit. Now kills the entire process group with SIGKILL, plus a catch-all pkill for any python test_ processes.
This commit is contained in:
@@ -12,13 +12,23 @@ if [ -z "$TEST_FILE" ]; then
|
||||
fi
|
||||
|
||||
# --- CLEANUP ---
|
||||
# Kill the screen AND every process inside it (handles deadlocked GPU procs)
|
||||
if screen -list | grep -q kernel-test; then
|
||||
echo "Killing existing kernel-test screen..."
|
||||
echo "Killing existing kernel-test screen and children..."
|
||||
# Find PIDs belonging to the screen session and SIGKILL them
|
||||
session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1)
|
||||
if [ -n "$session_pid" ]; then
|
||||
# Kill the entire process group (screen's children)
|
||||
pkill -9 -P "$session_pid" 2>/dev/null || true
|
||||
# Also nuke any python test process just in case
|
||||
pkill -9 -f "python.*test_" 2>/dev/null || true
|
||||
fi
|
||||
screen -S kernel-test -X quit 2>/dev/null || true
|
||||
sleep 2
|
||||
pkill -f "python.*test_fmha" 2>/dev/null || true
|
||||
sleep 1
|
||||
fi
|
||||
# Belt and suspenders: kill any leftover python test processes
|
||||
pkill -9 -f "python.*test_" 2>/dev/null || true
|
||||
sleep 1
|
||||
|
||||
rm -f /tmp/kernel-test.log
|
||||
|
||||
|
||||
Reference in New Issue
Block a user