60 lines
1.4 KiB
Python
60 lines
1.4 KiB
Python
"""
|
|
Test: Compile and run standalone FMHA SM100 test via nvcc.
|
|
No PyTorch needed — pure CUDA runtime test.
|
|
"""
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
|
|
def get_repo_root():
|
|
d = os.path.dirname(os.path.abspath(__file__))
|
|
while d != '/':
|
|
if os.path.exists(os.path.join(d, 'dsv4')): return d
|
|
d = os.path.dirname(d)
|
|
return None
|
|
|
|
REPO = get_repo_root()
|
|
CUDA = "/usr/local/cuda-13.2"
|
|
|
|
# Step 1: Compile standalone test
|
|
print("=" * 60)
|
|
print("Compiling standalone FMHA SM100 test...")
|
|
print("=" * 60)
|
|
|
|
src = f"{REPO}/tests/unit/test_fmha_sm100_standalone.cu"
|
|
out = "/tmp/fmha_sm100_standalone"
|
|
|
|
cmd = [
|
|
f"{CUDA}/bin/nvcc",
|
|
"--std=c++20",
|
|
f"-gencode=arch=compute_100a,code=sm_100a",
|
|
f"-I{REPO}",
|
|
"--expt-relaxed-constexpr",
|
|
src,
|
|
"-o", out,
|
|
"-lcudart",
|
|
]
|
|
|
|
print(f"nvcc: {' '.join(cmd[:4])}...")
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
if result.returncode != 0:
|
|
print(f"❌ Compilation FAILED:\n{result.stderr[-2000:]}")
|
|
sys.exit(1)
|
|
print(f"✅ Compiled: {out}")
|
|
|
|
# Step 2: Run the test
|
|
print("\n" + "=" * 60)
|
|
print("Running standalone FMHA SM100 test...")
|
|
print("=" * 60)
|
|
|
|
result = subprocess.run([out], capture_output=True, text=True, timeout=90)
|
|
print(result.stdout)
|
|
if result.stderr:
|
|
print(f"STDERR: {result.stderr[-500:]}")
|
|
print(f"Exit code: {result.returncode}")
|
|
|
|
if result.returncode == 0:
|
|
print("\n✅ ALL TESTS PASSED!")
|
|
else:
|
|
print("\n❌ TEST FAILED")
|