P6: Fix host-side BF16 conversion in test

This commit is contained in:
2026-05-30 17:00:51 +00:00
parent 1a87e054db
commit a88b321433

View File

@@ -53,10 +53,12 @@ int main() {
auto init_bf16 = [](bf16_t* d, int n) {
float* h = new float[n];
for (int i = 0; i < n; i++) h[i] = (float)rand() / RAND_MAX - 0.5f;
// Use host-side BF16 conversion
for (int i = 0; i < n; i++) {
unsigned short us;
asm("cvt.rn.bf16.f32 %0, %1;" : "=h"(us) : "f"(h[i]));
d[i] = us;
uint32_t u;
memcpy(&u, &h[i], 4);
u = u >> 16; // truncate FP32 to BF16 (rough but sufficient for test)
d[i] = (bf16_t)(u & 0xFFFF);
}
delete[] h;
};