tests/layertest.py: - Loads layer 0 expert weights from both original (MXFP4) and NVFP4 checkpoints - Dequantizes both to BF16 for reference comparison - Runs MoE forward pass in pure BF16 (no kernel) - Runs same forward pass through our NVFP4 CUTLASS kernel - Compares cosine similarity: kernel vs BF16 reference tests/run_test.sh: - Creates venv, installs deps, builds kernel from source, runs test Isolates our kernel completely from vLLM's weight loading, tensor parallelism, and MoE routing. If cosine ≈ 1.0, bug is in vLLM. If cosine ≈ 0, bug is in our kernel pipeline.
42 lines
985 B
Bash
Executable File
42 lines
985 B
Bash
Executable File
#!/bin/bash
|
|
# Setup and run the layer test on B200 — no Docker, no vLLM
|
|
# Compiles the kernel raw and runs the comparison test
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
REPO_DIR="$(dirname "$SCRIPT_DIR")"
|
|
VENV_DIR="$REPO_DIR/tests/.venv"
|
|
|
|
echo "=== NVFP4 Layer Test Setup ==="
|
|
echo "Repo: $REPO_DIR"
|
|
echo ""
|
|
|
|
# 1. Create venv
|
|
if [ ! -d "$VENV_DIR" ]; then
|
|
echo "[1/4] Creating venv..."
|
|
python3 -m venv "$VENV_DIR"
|
|
else
|
|
echo "[1/4] Venv already exists, skipping"
|
|
fi
|
|
|
|
source "$VENV_DIR/bin/activate"
|
|
|
|
# 2. Install dependencies
|
|
echo "[2/4] Installing Python deps..."
|
|
pip install --upgrade pip -q
|
|
pip install -r "$SCRIPT_DIR/requirements.txt" -q
|
|
|
|
# 3. Build and install the kernel from source
|
|
echo "[3/4] Building kernel from source (this takes a few minutes)..."
|
|
cd "$REPO_DIR"
|
|
pip install -e . --no-build-isolation
|
|
|
|
# 4. Run the test
|
|
echo "[4/4] Running layer test..."
|
|
cd "$SCRIPT_DIR"
|
|
python3 layertest.py
|
|
|
|
echo ""
|
|
echo "=== Done ==="
|