From a950f978d3c3aea3eb3649333039b2976bfe2b13 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 22 May 2026 17:08:12 +0000 Subject: [PATCH] run_test.sh: SIGKILL all children of screen session on cleanup Deadlocked GPU processes ignore SIGHUP from screen -X quit. Now kills the entire process group with SIGKILL, plus a catch-all pkill for any python test_ processes. --- tests/run_test.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/run_test.sh b/tests/run_test.sh index 55d694f7..964f3cee 100755 --- a/tests/run_test.sh +++ b/tests/run_test.sh @@ -12,13 +12,23 @@ if [ -z "$TEST_FILE" ]; then fi # --- CLEANUP --- +# Kill the screen AND every process inside it (handles deadlocked GPU procs) if screen -list | grep -q kernel-test; then - echo "Killing existing kernel-test screen..." + echo "Killing existing kernel-test screen and children..." + # Find PIDs belonging to the screen session and SIGKILL them + session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1) + if [ -n "$session_pid" ]; then + # Kill the entire process group (screen's children) + pkill -9 -P "$session_pid" 2>/dev/null || true + # Also nuke any python test process just in case + pkill -9 -f "python.*test_" 2>/dev/null || true + fi screen -S kernel-test -X quit 2>/dev/null || true sleep 2 - pkill -f "python.*test_fmha" 2>/dev/null || true - sleep 1 fi +# Belt and suspenders: kill any leftover python test processes +pkill -9 -f "python.*test_" 2>/dev/null || true +sleep 1 rm -f /tmp/kernel-test.log