Add run_test.sh harness (screen + log)

This commit is contained in:
2026-05-22 17:05:43 +00:00
parent d0626e0434
commit af475affab

View File

@@ -1,46 +1,41 @@
#!/bin/bash
# Setup and run the layer test on B200 — no Docker, no vLLM
# Compiles the kernel raw and runs the comparison test
# Test harness: runs a test in screen, logs to kernel-test.log
# Usage: ./run_test.sh <test_file>
# Example: ./run_test.sh tests/unit/test_fmha_v3.py
set -euo pipefail
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(dirname "$SCRIPT_DIR")"
VENV_DIR="$REPO_DIR/tests/.venv"
TEST_FILE="${1:?Usage: $0 <test_file>"
echo "=== NVFP4 Layer Test Setup ==="
echo "Repo: $REPO_DIR"
echo ""
# 1. Create venv
if [ ! -d "$VENV_DIR" ]; then
echo "[1/4] Creating venv..."
python3 -m venv "$VENV_DIR"
else
echo "[1/4] Venv already exists, skipping"
# --- CLEANUP ---
# Kill existing screen and all its processes
if screen -list | grep -q kernel-test; then
echo "Killing existing kernel-test screen..."
screen -S kernel-test -X quit 2>/dev/null || true
sleep 2
# Nuclear option: kill any lingering python test processes
pkill -f "python.*test_fmha" 2>/dev/null || true
sleep 1
fi
source "$VENV_DIR/bin/activate"
# Remove old log
rm -f /tmp/kernel-test.log
# 2. Install dependencies
echo "[2/4] Installing Python deps..."
pip install --upgrade pip -q
pip install -r "$SCRIPT_DIR/requirements.txt" -q
# --- START ---
cd /root/dsv4-nvfp4-workspace/kernel
source /root/dsv4-nvfp4-workspace/venv/bin/activate
export PYTHONPATH=/root/dsv4-nvfp4-workspace/kernel
# 3. Build and install the kernel from source
echo "[3/4] Building kernel from source (this takes a few minutes)..."
cd "$REPO_DIR"
pip install -e . --no-build-isolation -q
echo "Running: python -u $TEST_FILE"
echo "Log: /tmp/kernel-test.log"
screen -dmS kernel-test bash -c "python -u $TEST_FILE > /tmp/kernel-test.log 2>&1"
sleep 2
# Build the CUTLASS NVFP4 GEMM C++ extension
echo " Building CUTLASS C++ extension..."
cd "$REPO_DIR/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm"
python setup.py install
# 4. Run the test
echo "[4/4] Running layer test..."
cd "$SCRIPT_DIR"
python3 layertest.py
echo ""
echo "=== Done ==="
# Verify it started
if screen -list | grep -q kernel-test; then
echo "OK: screen 'kernel-test' is running"
else
echo "FAIL: screen did not start. Check /tmp/kernel-test.log"
cat /tmp/kernel-test.log 2>/dev/null
exit 1
fi