test: cleanup TS MMA test

This commit is contained in:
2026-05-28 13:38:07 +00:00
parent a767e90a12
commit dc2130cb12

View File

@@ -21,7 +21,7 @@ using namespace dsv4::kernels::attention;
constexpr int BLOCK_MN = 128;
__global__ void __launch_bounds__(128)
test_mma_ts(float* o_out)
test_mma_ts()
{
const int tid = threadIdx.x, wid = tid / 32, lane = tid % 32;
@@ -117,16 +117,12 @@ test_mma_ts(float* o_out)
int main() {
printf("=== Minimal tcgen05.mma TS Test ===\n");
float* d_out;
cudaMalloc(&d_out, 16 * sizeof(float));
int smem = (4 + 16 + 16*16*2 + 256 + 127) & ~127;
test_mma_ts<<<1, 128, smem>>>(d_out);
test_mma_ts<<<1, 128, smem>>>();
cudaError_t err = cudaDeviceSynchronize();
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }
printf("Test completed successfully!\n");
cudaFree(d_out);
printf("Kernel completed!\n");
return 0;
}