diff --git a/tests/unit/test_prefill_t2_debug.cu b/tests/unit/test_prefill_t2_debug.cu index b4d8e523..d34acebf 100644 --- a/tests/unit/test_prefill_t2_debug.cu +++ b/tests/unit/test_prefill_t2_debug.cu @@ -332,7 +332,7 @@ __global__ void prefill_t2_debug_kernel( // Read PV result for row qr asm volatile("fence.sc.gpu;" ::: "memory"); __syncthreads(); - prefill_read_pv_all_subs(tb, qr, sOacc, p_rescale); + prefill_read_pv_all_subs<512, 32>(tb, qr, sOacc, p_rescale); __syncthreads(); // Print first few accumulated values @@ -378,7 +378,7 @@ int main() { constexpr int HD = 512; constexpr int NOPE = 448; constexpr int ROPE = 64; - constexpr float scale = 1.0f / sqrtf((float)HD); + const float scale = 1.0f / sqrtf((float)HD); printf("=== Prefill T=2 Debug Test ===\n"); printf("T=%d N=%d HD=%d NOPE=%d ROPE=%d scale=%.6f\n", T, N, HD, NOPE, ROPE, scale);