P6: Fix host-side BF16→FP32 conversion in test
This commit is contained in:
@@ -153,7 +153,13 @@ int main() {
|
||||
|
||||
float* f_direct = new float[n_h * HD];
|
||||
float* f_tma = new float[n_h * HD];
|
||||
auto b2f = [](bf16_t h) -> float { float f; asm("cvt.f32.bf16 %0, %1;" : "=f"(f) : "h"(h)); return f; };
|
||||
auto b2f = [](bf16_t h) -> float {
|
||||
unsigned short us = h;
|
||||
unsigned int u = us << 16;
|
||||
float f;
|
||||
memcpy(&f, &u, 4);
|
||||
return f;
|
||||
};
|
||||
for (int i = 0; i < n_h * HD; i++) {
|
||||
f_direct[i] = b2f(h_o_direct[i]);
|
||||
f_tma[i] = b2f(h_o_tma[i]);
|
||||
|
||||
Reference in New Issue
Block a user