[Hardware] using current_platform.seed_everything (#9785)

Signed-off-by: wangshuai09 <391746016@qq.com>
This commit is contained in:
wangshuai09
2024-10-29 22:47:44 +08:00
committed by GitHub
parent 09500f7dde
commit 622b7ab955
27 changed files with 104 additions and 105 deletions

View File

@@ -8,7 +8,7 @@ from vllm import _custom_ops as ops
from vllm.attention.ops.blocksparse_attention.interface import (
LocalStridedBlockSparseAttn)
from vllm.platforms import current_platform
from vllm.utils import get_max_shared_memory_bytes, seed_everything
from vllm.utils import get_max_shared_memory_bytes
from .allclose_default import get_default_atol, get_default_rtol
@@ -173,7 +173,7 @@ def test_paged_attention(
blocksparse_block_size: int,
blocksparse_head_sliding_step: int,
) -> None:
seed_everything(seed)
current_platform.seed_everything(seed)
torch.set_default_device(device)
scale = float(1.0 / (head_size**0.5))
num_query_heads, num_kv_heads = num_heads
@@ -384,7 +384,7 @@ def test_varlen_blocksparse_attention_prefill(
seed: int,
device: str,
) -> None:
seed_everything(seed)
current_platform.seed_everything(seed)
torch.set_default_device(device)
# MAX_SEQ_LEN sometimes causes OOM in the reference implementation.
# As the xformers library is already tested with its own tests, we can use