[Model] H2O Danube3-4b (#6451)
This commit is contained in:
@@ -28,7 +28,7 @@ NUM_HEADS = [(40, 40), (64, 8)] # Arbitrary values for testing
|
||||
|
||||
# FlashAttention forward only supports head dimension at most 128
|
||||
# https://github.com/ROCmSoftwarePlatform/flash-attention/blob/3d2b6f5d037782cc2c906909a46fb7e2e1b48b25/csrc/flash_attn_rocm/flash_api.cpp#L62
|
||||
HEAD_SIZES = [64, 80, 96, 112, 128, 192, 256
|
||||
HEAD_SIZES = [64, 80, 96, 112, 120, 128, 192, 256
|
||||
] if not is_hip() else [64, 80, 96, 112, 128]
|
||||
|
||||
BLOCK_SIZES = [16, 32]
|
||||
@@ -134,6 +134,8 @@ def test_paged_attention(
|
||||
seed: int,
|
||||
device: str,
|
||||
) -> None:
|
||||
if kv_cache_dtype == "fp8" and head_size % 16:
|
||||
pytest.skip()
|
||||
random.seed(seed)
|
||||
torch.random.manual_seed(seed)
|
||||
if torch.cuda.is_available():
|
||||
|
||||
Reference in New Issue
Block a user