Files
nvfp4-megamoe-kernel/tests/run_test.sh
biondizzle a950f978d3 run_test.sh: SIGKILL all children of screen session on cleanup
Deadlocked GPU processes ignore SIGHUP from screen -X quit.
Now kills the entire process group with SIGKILL, plus a catch-all
pkill for any python test_ processes.
2026-05-22 17:08:12 +00:00

52 lines
1.6 KiB
Bash
Executable File

#!/bin/bash
# Test harness: runs a test in screen, logs to kernel-test.log
# Usage: ./run_test.sh <test_file>
# Example: ./run_test.sh tests/unit/test_fmha_v3.py
set -e
TEST_FILE=$1
if [ -z "$TEST_FILE" ]; then
echo "Usage: $0 <test_file>"
exit 1
fi
# --- CLEANUP ---
# Kill the screen AND every process inside it (handles deadlocked GPU procs)
if screen -list | grep -q kernel-test; then
echo "Killing existing kernel-test screen and children..."
# Find PIDs belonging to the screen session and SIGKILL them
session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1)
if [ -n "$session_pid" ]; then
# Kill the entire process group (screen's children)
pkill -9 -P "$session_pid" 2>/dev/null || true
# Also nuke any python test process just in case
pkill -9 -f "python.*test_" 2>/dev/null || true
fi
screen -S kernel-test -X quit 2>/dev/null || true
sleep 2
fi
# Belt and suspenders: kill any leftover python test processes
pkill -9 -f "python.*test_" 2>/dev/null || true
sleep 1
rm -f /tmp/kernel-test.log
# --- START ---
cd /root/dsv4-nvfp4-workspace/kernel
source /root/dsv4-nvfp4-workspace/venv/bin/activate
export PYTHONPATH=/root/dsv4-nvfp4-workspace/kernel
echo "Running: python -u $TEST_FILE"
echo "Log: /tmp/kernel-test.log"
screen -dmS kernel-test bash -c 'python -u '"$TEST_FILE"' > /tmp/kernel-test.log 2>&1'
sleep 2
if screen -list | grep -q kernel-test; then
echo "OK: screen kernel-test is running"
else
echo "FAIL: screen did not start. Log below:"
cat /tmp/kernel-test.log 2>/dev/null
exit 1
fi