P6: Fix host-side BF16 conversion in test
This commit is contained in:
@@ -53,10 +53,12 @@ int main() {
|
||||
auto init_bf16 = [](bf16_t* d, int n) {
|
||||
float* h = new float[n];
|
||||
for (int i = 0; i < n; i++) h[i] = (float)rand() / RAND_MAX - 0.5f;
|
||||
// Use host-side BF16 conversion
|
||||
for (int i = 0; i < n; i++) {
|
||||
unsigned short us;
|
||||
asm("cvt.rn.bf16.f32 %0, %1;" : "=h"(us) : "f"(h[i]));
|
||||
d[i] = us;
|
||||
uint32_t u;
|
||||
memcpy(&u, &h[i], 4);
|
||||
u = u >> 16; // truncate FP32 to BF16 (rough but sufficient for test)
|
||||
d[i] = (bf16_t)(u & 0xFFFF);
|
||||
}
|
||||
delete[] h;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user