[Kernel][Hardware][AMD] Bf16 mfma opt for ROCm skinny GEMMs (#17071)

Signed-off-by: Hashem Hashemi <hashem.hashemi@amd.com>
Signed-off-by: charlifu <charlifu@amd.com>
Co-authored-by: charlifu <charlifu@amd.com>
This commit is contained in:
Hashem Hashemi
2025-05-07 22:34:49 -07:00
committed by GitHub
parent 6930a41116
commit 5a499e70d5
4 changed files with 321 additions and 233 deletions

View File

@@ -104,6 +104,7 @@ def device_id_to_physical_device_id(device_id: int) -> int:
return device_id
@cache
def on_mi250_mi300() -> bool:
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
return any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942"])