Add run_test.sh harness (screen + log)
This commit is contained in:
@@ -1,46 +1,41 @@
|
||||
#!/bin/bash
|
||||
# Setup and run the layer test on B200 — no Docker, no vLLM
|
||||
# Compiles the kernel raw and runs the comparison test
|
||||
# Test harness: runs a test in screen, logs to kernel-test.log
|
||||
# Usage: ./run_test.sh <test_file>
|
||||
# Example: ./run_test.sh tests/unit/test_fmha_v3.py
|
||||
|
||||
set -euo pipefail
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
VENV_DIR="$REPO_DIR/tests/.venv"
|
||||
TEST_FILE="${1:?Usage: $0 <test_file>"
|
||||
|
||||
echo "=== NVFP4 Layer Test Setup ==="
|
||||
echo "Repo: $REPO_DIR"
|
||||
echo ""
|
||||
|
||||
# 1. Create venv
|
||||
if [ ! -d "$VENV_DIR" ]; then
|
||||
echo "[1/4] Creating venv..."
|
||||
python3 -m venv "$VENV_DIR"
|
||||
else
|
||||
echo "[1/4] Venv already exists, skipping"
|
||||
# --- CLEANUP ---
|
||||
# Kill existing screen and all its processes
|
||||
if screen -list | grep -q kernel-test; then
|
||||
echo "Killing existing kernel-test screen..."
|
||||
screen -S kernel-test -X quit 2>/dev/null || true
|
||||
sleep 2
|
||||
# Nuclear option: kill any lingering python test processes
|
||||
pkill -f "python.*test_fmha" 2>/dev/null || true
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
source "$VENV_DIR/bin/activate"
|
||||
# Remove old log
|
||||
rm -f /tmp/kernel-test.log
|
||||
|
||||
# 2. Install dependencies
|
||||
echo "[2/4] Installing Python deps..."
|
||||
pip install --upgrade pip -q
|
||||
pip install -r "$SCRIPT_DIR/requirements.txt" -q
|
||||
# --- START ---
|
||||
cd /root/dsv4-nvfp4-workspace/kernel
|
||||
source /root/dsv4-nvfp4-workspace/venv/bin/activate
|
||||
export PYTHONPATH=/root/dsv4-nvfp4-workspace/kernel
|
||||
|
||||
# 3. Build and install the kernel from source
|
||||
echo "[3/4] Building kernel from source (this takes a few minutes)..."
|
||||
cd "$REPO_DIR"
|
||||
pip install -e . --no-build-isolation -q
|
||||
echo "Running: python -u $TEST_FILE"
|
||||
echo "Log: /tmp/kernel-test.log"
|
||||
screen -dmS kernel-test bash -c "python -u $TEST_FILE > /tmp/kernel-test.log 2>&1"
|
||||
sleep 2
|
||||
|
||||
# Build the CUTLASS NVFP4 GEMM C++ extension
|
||||
echo " Building CUTLASS C++ extension..."
|
||||
cd "$REPO_DIR/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm"
|
||||
python setup.py install
|
||||
|
||||
# 4. Run the test
|
||||
echo "[4/4] Running layer test..."
|
||||
cd "$SCRIPT_DIR"
|
||||
python3 layertest.py
|
||||
|
||||
echo ""
|
||||
echo "=== Done ==="
|
||||
# Verify it started
|
||||
if screen -list | grep -q kernel-test; then
|
||||
echo "OK: screen 'kernel-test' is running"
|
||||
else
|
||||
echo "FAIL: screen did not start. Check /tmp/kernel-test.log"
|
||||
cat /tmp/kernel-test.log 2>/dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user