#!/bin/bash # Test harness: runs a test in screen, logs to kernel-test.log # Usage: ./run_test.sh # Example: ./run_test.sh tests/unit/test_fmha_v3.py set -e TEST_FILE=$1 if [ -z "$TEST_FILE" ]; then echo "Usage: $0 " exit 1 fi # --- CLEANUP --- # Kill the screen AND every process inside it (handles deadlocked GPU procs) if screen -list | grep -q kernel-test; then echo "Killing existing kernel-test screen and children..." # Find PIDs belonging to the screen session and SIGKILL them session_pid=$(screen -ls | grep kernel-test | grep -o '[0-9]*' | head -1) if [ -n "$session_pid" ]; then # Kill the entire process group (screen's children) pkill -9 -P "$session_pid" 2>/dev/null || true # Also nuke any python test process just in case pkill -9 -f "python.*test_" 2>/dev/null || true fi screen -S kernel-test -X quit 2>/dev/null || true sleep 2 fi # Belt and suspenders: kill any leftover python test processes pkill -9 -f "python.*test_" 2>/dev/null || true sleep 1 rm -f /tmp/kernel-test.log # --- START --- cd /root/dsv4-nvfp4-workspace/kernel source /root/dsv4-nvfp4-workspace/venv/bin/activate export PYTHONPATH=/root/dsv4-nvfp4-workspace/kernel echo "Running: python -u $TEST_FILE" echo "Log: /tmp/kernel-test.log" screen -dmS kernel-test bash -c 'python -u '"$TEST_FILE"' > /tmp/kernel-test.log 2>&1' sleep 2 if screen -list | grep -q kernel-test; then echo "OK: screen kernel-test is running" else echo "FAIL: screen did not start. Log below:" cat /tmp/kernel-test.log 2>/dev/null exit 1 fi