test: cleanup TS MMA test
This commit is contained in:
@@ -21,7 +21,7 @@ using namespace dsv4::kernels::attention;
|
||||
constexpr int BLOCK_MN = 128;
|
||||
|
||||
__global__ void __launch_bounds__(128)
|
||||
test_mma_ts(float* o_out)
|
||||
test_mma_ts()
|
||||
{
|
||||
const int tid = threadIdx.x, wid = tid / 32, lane = tid % 32;
|
||||
|
||||
@@ -117,16 +117,12 @@ test_mma_ts(float* o_out)
|
||||
int main() {
|
||||
printf("=== Minimal tcgen05.mma TS Test ===\n");
|
||||
|
||||
float* d_out;
|
||||
cudaMalloc(&d_out, 16 * sizeof(float));
|
||||
|
||||
int smem = (4 + 16 + 16*16*2 + 256 + 127) & ~127;
|
||||
test_mma_ts<<<1, 128, smem>>>(d_out);
|
||||
test_mma_ts<<<1, 128, smem>>>();
|
||||
|
||||
cudaError_t err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }
|
||||
|
||||
printf("Test completed successfully!\n");
|
||||
cudaFree(d_out);
|
||||
printf("Kernel completed!\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user