diff --git a/tests/run_test.sh b/tests/run_test.sh index 55d694f7..964f3cee 100755 --- a/tests/run_test.sh +++ b/tests/run_test.sh @@ -12,13 +12,23 @@ if [ -z "$TEST_FILE" ]; then fi # --- CLEANUP --- +# Kill the screen AND every process inside it (handles deadlocked GPU procs) if screen -list | grep -q kernel-test; then - echo "Killing existing kernel-test screen..." + echo "Killing existing kernel-test screen and children..." + # Find PIDs belonging to the screen session and SIGKILL them + session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1) + if [ -n "$session_pid" ]; then + # Kill the entire process group (screen's children) + pkill -9 -P "$session_pid" 2>/dev/null || true + # Also nuke any python test process just in case + pkill -9 -f "python.*test_" 2>/dev/null || true + fi screen -S kernel-test -X quit 2>/dev/null || true sleep 2 - pkill -f "python.*test_fmha" 2>/dev/null || true - sleep 1 fi +# Belt and suspenders: kill any leftover python test processes +pkill -9 -f "python.*test_" 2>/dev/null || true +sleep 1 rm -f /tmp/kernel-test.log