[misc][cuda] use nvml to avoid accidentally cuda initialization (#6007)
This commit is contained in:
@@ -2,13 +2,13 @@ import math
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.utils import is_cpu, is_hip
|
||||
from vllm.utils import get_device_capability_stateless, is_cpu, is_hip
|
||||
|
||||
from .utils import (dense_to_crow_col, get_head_sliding_step,
|
||||
get_sparse_attn_mask)
|
||||
|
||||
IS_COMPUTE_8_OR_ABOVE = (torch.cuda.is_available()
|
||||
and torch.cuda.get_device_capability()[0] >= 8)
|
||||
and get_device_capability_stateless()[0] >= 8)
|
||||
|
||||
if IS_COMPUTE_8_OR_ABOVE:
|
||||
from .blocksparse_attention_kernel import blocksparse_flash_attn_varlen_fwd
|
||||
@@ -235,4 +235,4 @@ class LocalStridedBlockSparseAttn(torch.nn.Module):
|
||||
v,
|
||||
cu_seqlens_k,
|
||||
cu_seqlens_q=cu_seqlens_q,
|
||||
sm_scale=sm_scale)
|
||||
sm_scale=sm_scale)
|
||||
|
||||
Reference in New Issue
Block a user