diff --git a/deep_gemm/include/deep_gemm/impls/sm100_fp8_gemm_1d2d.cuh b/deep_gemm/include/deep_gemm/impls/sm100_fp8_gemm_1d2d.cuh
index 455e600..e04db3c 100644
--- a/deep_gemm/include/deep_gemm/impls/sm100_fp8_gemm_1d2d.cuh
+++ b/deep_gemm/include/deep_gemm/impls/sm100_fp8_gemm_1d2d.cuh
@@ -513,13 +513,11 @@ sm100_fp8_gemm_1d2d_impl(float* sfb, int* grouped_layout,
         }
 
         // Flush all stages in the pipeline to make TMA stores visible to the next kernel
-        // TODO: do we actually need this?
         if (epilogue_thread_idx_in_warpgroup == 0)
             cute::tma_store_wait<0>();
 
         // Deallocate tensor memory by warp 1
         // NOTES: warp 0 is waiting TMA store
-        // TODO: do we need 2 SM allocation?
         if (epilogue_warp_idx == 1)
             Allocator().free(0, kNumTmemCols);
     }