[NVIDIA] Support nvfp4 quantization (#12784)

This commit is contained in:
Kaixi Hou
2025-02-12 19:51:51 -08:00
committed by GitHub
parent 9f9704dca6
commit 4fc5c23bb6
12 changed files with 688 additions and 13 deletions

View File

@@ -1,16 +1,22 @@
#include "cuda_utils.h"
#ifdef USE_ROCM
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#endif
int64_t get_device_attribute(int64_t attribute, int64_t device_id) {
int device, value;
if (device_id < 0) {
cudaGetDevice(&device);
} else {
device = device_id;
}
cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute),
device);
// Return the cached value on subsequent calls
static int value = [=]() {
int device = static_cast<int>(device_id);
if (device < 0) {
CUDA_CHECK(cudaGetDevice(&device));
}
int value;
CUDA_CHECK(cudaDeviceGetAttribute(
&value, static_cast<cudaDeviceAttr>(attribute), device));
return static_cast<int>(value);
}();
return value;
}