test: clean minimal nvvm.inline_ptx test

This commit is contained in:
2026-05-28 04:45:21 +00:00
parent 882d48588b
commit eebf33b97d

View File

@@ -0,0 +1,40 @@
"""Minimal nvvm.inline_ptx test - no debug env vars."""
import torch
import cutlass.cute as cute
import cutlass.torch as cutlass_torch
from cutlass.cutlass_dsl import dsl_user_op, T
from cutlass._mlir.dialects import nvvm
from cutlass.cute.typing import Float32, Int32
@dsl_user_op
def f32_to_i32_rni(x: Float32, *, loc=None, ip=None) -> Int32:
result = nvvm.inline_ptx(
write_only_args=[T.i32()],
read_only_args=[Float32(x).ir_value(loc=loc, ip=ip)],
ptx_code="cvt.rni.s32.f32 $0, $1;",
loc=loc,
ip=ip,
)
return Int32(result)
@cute.kernel
def test_k(inp: cute.Tensor, out: cute.Tensor):
tidx, _, _ = cute.arch.thread_idx()
if tidx == Int32(0):
x = cute.arch.load(inp.iterator, Float32)
r = f32_to_i32_rni(x)
cute.arch.store(out.iterator, r)
if __name__ == "__main__":
x = torch.tensor([3.7], dtype=torch.float32, device='cuda')
o = torch.zeros(1, dtype=torch.int32, device='cuda')
xc = cutlass_torch.from_dlpack(x).mark_layout_dynamic(leading_dim=0)
oc = cutlass_torch.from_dlpack(o).mark_layout_dynamic(leading_dim=0)
print("Compiling...")
compiled = cute.compile(test_k, xc, oc)
print("Running...")
compiled(xc, oc)
print(f"Result: {o.item()} (expected 4)")