[Build] Avoid building too many extensions (#1624)

This commit is contained in:
Yanming W
2023-11-23 16:31:19 -08:00
committed by GitHub
parent de23687d16
commit e0c6f556e8
25 changed files with 206 additions and 272 deletions

View File

@@ -6,7 +6,7 @@ import torch
from xformers import ops as xops
from xformers.ops.fmha.attn_bias import BlockDiagonalCausalMask
from vllm import attention_ops
from vllm._C import ops
from vllm.utils import get_max_shared_memory_bytes
FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
@@ -165,7 +165,7 @@ def test_paged_attention(
# Call the paged attention kernel.
output = torch.empty_like(query)
if version == "v1":
attention_ops.paged_attention_v1(
ops.paged_attention_v1(
output,
query,
key_cache,
@@ -194,7 +194,7 @@ def test_paged_attention(
device=output.device,
)
max_logits = torch.empty_like(exp_sums)
attention_ops.paged_attention_v2(
ops.paged_attention_v2(
output,
exp_sums,
max_logits,