From dc9596c6bcbea74c2157af17866a640edd98b521 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 04:46:45 +0000 Subject: [PATCH] =?UTF-8?q?test:=20sub-process=20isolation=20for=20each=20?= =?UTF-8?q?f32=E2=86=92i32=20approach?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_ptx_runner.py | 18 ++++++++ tests/unit/test_ptx_subproc.py | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tests/unit/test_ptx_runner.py create mode 100644 tests/unit/test_ptx_subproc.py diff --git a/tests/unit/test_ptx_runner.py b/tests/unit/test_ptx_runner.py new file mode 100644 index 00000000..9fc9bf3b --- /dev/null +++ b/tests/unit/test_ptx_runner.py @@ -0,0 +1,18 @@ +"""Runner: test each f32→i32 approach in a separate process.""" +import subprocess +import sys + +for approach in ["arith", "nvvm_ptx", "nvvm_multiline"]: + print(f"\n{'='*50}") + print(f"Testing: {approach}") + print(f"{'='*50}") + result = subprocess.run( + [sys.executable, "test_ptx_subproc.py", approach], + capture_output=True, + text=True, + timeout=120, + ) + print(result.stdout) + if result.stderr: + print(f"STDERR: {result.stderr[:500]}") + print(f"Exit code: {result.returncode}") diff --git a/tests/unit/test_ptx_subproc.py b/tests/unit/test_ptx_subproc.py new file mode 100644 index 00000000..3d7dc548 --- /dev/null +++ b/tests/unit/test_ptx_subproc.py @@ -0,0 +1,76 @@ +"""Test: run each approach in a separate process to survive LLVM ERROR.""" +import torch +import cutlass.cute as cute +import cutlass.torch as cutlass_torch +import sys + +approach = sys.argv[1] if len(sys.argv) > 1 else "arith" + +if approach == "arith": + from cutlass.cutlass_dsl import dsl_user_op, T + from cutlass._mlir.dialects import arith + from cutlass.cute.typing import Float32, Int32 + + @dsl_user_op + def f32_to_i32(x: Float32, *, loc=None, ip=None) -> Int32: + return Int32( + arith.fptosi(T.i32(), Float32(x).ir_value(loc=loc, ip=ip), loc=loc, ip=ip) + ) + +elif approach == "nvvm_ptx": + from cutlass.cutlass_dsl import dsl_user_op, T + from cutlass._mlir.dialects import nvvm + from cutlass.cute.typing import Float32, Int32 + + @dsl_user_op + def f32_to_i32(x: Float32, *, loc=None, ip=None) -> Int32: + result = nvvm.inline_ptx( + write_only_args=[T.i32()], + read_only_args=[Float32(x).ir_value(loc=loc, ip=ip)], + ptx_code="cvt.rni.s32.f32 $0, $1;", + loc=loc, + ip=ip, + ) + return Int32(result) + +elif approach == "nvvm_multiline": + from cutlass.cutlass_dsl import dsl_user_op, T + from cutlass._mlir.dialects import nvvm + from cutlass.cute.typing import Float32, Int32 + + @dsl_user_op + def f32_to_i32(x: Float32, *, loc=None, ip=None) -> Int32: + result = nvvm.inline_ptx( + write_only_args=[T.i32()], + read_only_args=[Float32(x).ir_value(loc=loc, ip=ip)], + ptx_code="{\n\tcvt.rni.s32.f32 $0, $1;\n\t}", + loc=loc, + ip=ip, + ) + return Int32(result) + +else: + print(f"Unknown approach: {approach}") + sys.exit(1) + + +@cute.kernel +def test_k(inp: cute.Tensor, out: cute.Tensor): + tidx, _, _ = cute.arch.thread_idx() + if tidx == Int32(0): + x = cute.arch.load(inp.iterator, Float32) + r = f32_to_i32(x) + cute.arch.store(out.iterator, r) + + +if __name__ == "__main__": + x = torch.tensor([3.7], dtype=torch.float32, device='cuda') + o = torch.zeros(1, dtype=torch.int32, device='cuda') + xc = cutlass_torch.from_dlpack(x).mark_layout_dynamic(leading_dim=0) + oc = cutlass_torch.from_dlpack(o).mark_layout_dynamic(leading_dim=0) + print(f"Approach: {approach}") + print("Compiling...") + compiled = cute.compile(test_k, xc, oc) + print("Running...") + compiled(xc, oc) + print(f"Result: {o.item()} (expected 4)")