[CI/Build] Avoid CUDA initialization (#8534)

This commit is contained in:
Cyrus Leung
2024-09-18 18:38:11 +08:00
committed by GitHub
parent e351572900
commit 6ffa3f314c
55 changed files with 256 additions and 256 deletions

View File

@@ -9,7 +9,7 @@ from xformers.ops.fmha.attn_bias import BlockDiagonalCausalFromBottomRightMask
from vllm.attention.backends.xformers import _make_alibi_bias
from vllm.attention.ops.prefix_prefill import context_attention_fwd
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, seed_everything
NUM_HEADS = [64]
NUM_QUERIES_PER_KV = [1, 8, 64]
@@ -39,10 +39,7 @@ def test_contexted_kv_attention(
kv_cache_dtype: str,
device: str,
) -> None:
random.seed(0)
torch.manual_seed(0)
if torch.cuda.is_available():
torch.cuda.manual_seed(0)
seed_everything(0)
torch.set_default_device(device)
# Need this, otherwise when we capture the graph the process
@@ -237,10 +234,7 @@ def test_contexted_kv_attention_alibi(
kv_cache_dtype: str,
device: str,
) -> None:
random.seed(0)
torch.manual_seed(0)
if torch.cuda.is_available():
torch.cuda.manual_seed(0)
seed_everything(0)
torch.set_default_device(device)
# Need this, otherwise when we capture the graph the process