[ROCm][CI] Fix flaky GPTQ compile correctness test (#38161)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
@@ -137,6 +137,7 @@ def test_compile_correctness(
|
||||
all_args.append(
|
||||
final_args + [f"-cc.mode={mode.name}", "-cc.backend=inductor"]
|
||||
)
|
||||
all_envs.append({})
|
||||
|
||||
# inductor will change the output, so we only compare if the output
|
||||
# is close, not exactly the same.
|
||||
@@ -157,6 +158,5 @@ def test_compile_correctness(
|
||||
]:
|
||||
all_args.append(final_args + [f"-cc.mode={mode.name}", "-cc.backend=eager"])
|
||||
all_envs.append({})
|
||||
all_envs.append({})
|
||||
|
||||
compare_all_settings(model, all_args * 3, all_envs, method=method)
|
||||
compare_all_settings(model, all_args, all_envs, method=method)
|
||||
|
||||
@@ -1348,40 +1348,47 @@ def initialize_single_dummy_weight(
|
||||
high: float = 1e-3,
|
||||
seed: int = 1234,
|
||||
) -> None:
|
||||
if torch.is_floating_point(param):
|
||||
if current_platform.is_tpu():
|
||||
generator = torch.Generator(device="cpu")
|
||||
generator.manual_seed(seed)
|
||||
# Note: The param.uniform_ function cannot be used in this
|
||||
# context because it demands more TPU HBM than directly copying
|
||||
# from a CPU tensor.
|
||||
# Note: We avoid using torch.rank_like as it doesn't currently
|
||||
# support the generator argument.
|
||||
param.copy_(
|
||||
(high - low)
|
||||
* torch.rand(
|
||||
param.shape,
|
||||
generator=generator,
|
||||
dtype=param.dtype,
|
||||
layout=param.layout,
|
||||
requires_grad=param.requires_grad,
|
||||
device="cpu",
|
||||
)
|
||||
+ low
|
||||
)
|
||||
torch._sync(param)
|
||||
return
|
||||
if not torch.is_floating_point(param):
|
||||
if current_platform.is_rocm():
|
||||
# On ROCm, integer params (e.g. GPTQ qweight/qzeros) are left
|
||||
# as torch.empty() by default, giving non-deterministic values
|
||||
# across processes. Zero them for reproducibility.
|
||||
param.zero_()
|
||||
return
|
||||
|
||||
generator = torch.Generator(device=param.data.device)
|
||||
if current_platform.is_tpu():
|
||||
generator = torch.Generator(device="cpu")
|
||||
generator.manual_seed(seed)
|
||||
if torch.finfo(param.data.dtype).bits < 16:
|
||||
# uniform_ doesn't support < 16-bit datatypes (FP8)
|
||||
dtype = param.data.dtype
|
||||
tmp_param = param.data.to(torch.float16)
|
||||
tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype)
|
||||
param.data.copy_(tmp_param)
|
||||
else:
|
||||
param.uniform_(low, high, generator=generator)
|
||||
# Note: The param.uniform_ function cannot be used in this
|
||||
# context because it demands more TPU HBM than directly copying
|
||||
# from a CPU tensor.
|
||||
# Note: We avoid using torch.rank_like as it doesn't currently
|
||||
# support the generator argument.
|
||||
param.copy_(
|
||||
(high - low)
|
||||
* torch.rand(
|
||||
param.shape,
|
||||
generator=generator,
|
||||
dtype=param.dtype,
|
||||
layout=param.layout,
|
||||
requires_grad=param.requires_grad,
|
||||
device="cpu",
|
||||
)
|
||||
+ low
|
||||
)
|
||||
torch._sync(param)
|
||||
return
|
||||
|
||||
generator = torch.Generator(device=param.data.device)
|
||||
generator.manual_seed(seed)
|
||||
if torch.finfo(param.data.dtype).bits < 16:
|
||||
# uniform_ doesn't support < 16-bit datatypes (FP8)
|
||||
dtype = param.data.dtype
|
||||
tmp_param = param.data.to(torch.float16)
|
||||
tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype)
|
||||
param.data.copy_(tmp_param)
|
||||
else:
|
||||
param.uniform_(low, high, generator=generator)
|
||||
|
||||
|
||||
def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
|
||||
|
||||
Reference in New Issue
Block a user