fix: 16x256b.x1 load syntax — single address operand
This commit is contained in:
@@ -68,9 +68,9 @@ test_16x256b_loads(float* results) {
|
||||
// Read column 0 — lane 0 should get rows 0-3, lane 1 should get rows 4-7, etc.
|
||||
{
|
||||
float v0, v1, v2, v3;
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
|
||||
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
|
||||
: "r"(tb), "r"(0)); // column 0
|
||||
: "r"(tb + 0)); // column 0
|
||||
asm volatile("tcgen05.wait::ld.sync.aligned;");
|
||||
load_count++;
|
||||
|
||||
@@ -96,9 +96,9 @@ test_16x256b_loads(float* results) {
|
||||
// Read column 1 (2nd 16x256b.x1 load — does it crash?)
|
||||
{
|
||||
float v0, v1, v2, v3;
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
|
||||
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
|
||||
: "r"(tb), "r"(1)); // column 1
|
||||
: "r"(tb + 1)); // column 1
|
||||
asm volatile("tcgen05.wait::ld.sync.aligned;");
|
||||
load_count++;
|
||||
|
||||
@@ -113,9 +113,9 @@ test_16x256b_loads(float* results) {
|
||||
// Read column 8 (8th column — more 16x256b.x1 loads)
|
||||
{
|
||||
float v0, v1, v2, v3;
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4, %5];"
|
||||
asm volatile("tcgen05.ld.sync.aligned.16x256b.x1.b32 {%0, %1, %2, %3}, [%4];"
|
||||
: "=f"(v0), "=f"(v1), "=f"(v2), "=f"(v3)
|
||||
: "r"(tb), "r"(8));
|
||||
: "r"(tb + 8));
|
||||
asm volatile("tcgen05.wait::ld.sync.aligned;");
|
||||
load_count++;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user