[Feature][ROCm]Enable fusion pass for torch.compile on ROCm (#15050)

Signed-off-by: charlifu <charlifu@amd.com>
This commit is contained in:
Charlie Fu
2025-03-31 06:42:18 -05:00
committed by GitHub
parent effc5d24fa
commit e85829450d
8 changed files with 92 additions and 72 deletions

View File

@@ -33,8 +33,8 @@ static __device__ __forceinline__ int8_t float_to_int8_rn(float const x) {
template <typename fp8_type>
static __device__ __forceinline__ fp8_type float_to_fp8(float const x) {
float const r = fmax(-fp8_e4m3_adjusted_max_v<fp8_type>,
fmin(x, fp8_e4m3_adjusted_max_v<fp8_type>));
float const r =
fmax(-quant_type_max_v<fp8_type>, fmin(x, quant_type_max_v<fp8_type>));
return static_cast<fp8_type>(r);
}