run_test.sh: SIGKILL all children of screen session on cleanup

Deadlocked GPU processes ignore SIGHUP from screen -X quit.
Now kills the entire process group with SIGKILL, plus a catch-all
pkill for any python test_ processes.
This commit is contained in:
2026-05-22 17:08:12 +00:00
parent ebb5d1ea23
commit a950f978d3

View File

@@ -12,13 +12,23 @@ if [ -z "$TEST_FILE" ]; then
fi
# --- CLEANUP ---
# Kill the screen AND every process inside it (handles deadlocked GPU procs)
if screen -list | grep -q kernel-test; then
echo "Killing existing kernel-test screen..."
echo "Killing existing kernel-test screen and children..."
# Find PIDs belonging to the screen session and SIGKILL them
session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1)
if [ -n "$session_pid" ]; then
# Kill the entire process group (screen's children)
pkill -9 -P "$session_pid" 2>/dev/null || true
# Also nuke any python test process just in case
pkill -9 -f "python.*test_" 2>/dev/null || true
fi
screen -S kernel-test -X quit 2>/dev/null || true
sleep 2
pkill -f "python.*test_fmha" 2>/dev/null || true
sleep 1
fi
# Belt and suspenders: kill any leftover python test processes
pkill -9 -f "python.*test_" 2>/dev/null || true
sleep 1
rm -f /tmp/kernel-test.log