fix: 16x256b.x1 load syntax — single address operand

This commit is contained in:
2026-05-28 23:02:23 +00:00
parent 44dcd6e8d0
commit fffb493b0e

View File

@@ -68,9 +68,9 @@ test_16x256b_loads(float* results) {
// Read column 0 — lane 0 should get rows 0-3, lane 1 should get rows 4-7, etc.
{
float v0, v1, v2, v3;
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
: "r"(tb), "r"(0)); // column 0
: "r"(tb + 0)); // column 0
asm volatile("tcgen05.wait::ld.sync.aligned;");
load_count++;
@@ -96,9 +96,9 @@ test_16x256b_loads(float* results) {
// Read column 1 (2nd 16x256b.x1 load — does it crash?)
{
float v0, v1, v2, v3;
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
: "r"(tb), "r"(1)); // column 1
: "r"(tb + 1)); // column 1
asm volatile("tcgen05.wait::ld.sync.aligned;");
load_count++;
@@ -113,9 +113,9 @@ test_16x256b_loads(float* results) {
// Read column 8 (8th column — more 16x256b.x1 loads)
{
float v0, v1, v2, v3;
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
: "r"(tb), "r"(8));
: "r"(tb + 8));
asm volatile("tcgen05.wait::ld.sync.aligned;");
load_count++;