Files
nvfp4-megamoe-kernel/tests/unit/test_fmha_sm100.py

60 lines
1.4 KiB
Python

"""
Test: Compile and run standalone FMHA SM100 test via nvcc.
No PyTorch needed — pure CUDA runtime test.
"""
import subprocess
import sys
import os
def get_repo_root():
d = os.path.dirname(os.path.abspath(__file__))
while d != '/':
if os.path.exists(os.path.join(d, 'dsv4')): return d
d = os.path.dirname(d)
return None
REPO = get_repo_root()
CUDA = "/usr/local/cuda-13.2"
# Step 1: Compile standalone test
print("=" * 60)
print("Compiling standalone FMHA SM100 test...")
print("=" * 60)
src = f"{REPO}/tests/unit/test_fmha_sm100_standalone.cu"
out = "/tmp/fmha_sm100_standalone"
cmd = [
f"{CUDA}/bin/nvcc",
"--std=c++20",
f"-gencode=arch=compute_100a,code=sm_100a",
f"-I{REPO}",
"--expt-relaxed-constexpr",
src,
"-o", out,
"-lcudart",
]
print(f"nvcc: {' '.join(cmd[:4])}...")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
print(f"❌ Compilation FAILED:\n{result.stderr[-2000:]}")
sys.exit(1)
print(f"✅ Compiled: {out}")
# Step 2: Run the test
print("\n" + "=" * 60)
print("Running standalone FMHA SM100 test...")
print("=" * 60)
result = subprocess.run([out], capture_output=True, text=True, timeout=90)
print(result.stdout)
if result.stderr:
print(f"STDERR: {result.stderr[-500:]}")
print(f"Exit code: {result.returncode}")
if result.returncode == 0:
print("\n✅ ALL TESTS PASSED!")
else:
print("\n❌ TEST FAILED")