Fix integer overflows in attention & cache ops (#1514)

This commit is contained in:
Woosuk Kwon
2023-10-31 15:19:30 -07:00
committed by GitHub
parent 9cabcb7645
commit 0ce8647dc5
5 changed files with 53 additions and 47 deletions

View File

@@ -13,7 +13,7 @@ FLOAT32_BYTES = torch.finfo(torch.float).bits // 8
# This will change depending on the compute capability.
# - 512 as a buffer
MAX_SEQ_LEN = get_max_shared_memory_bytes() // FLOAT32_BYTES - 512
NUM_BLOCKS = 128 # Arbitrary values for testing
NUM_BLOCKS = 40000 # Arbitrary values for testing
PARTITION_SIZE = 512
DTYPES = [torch.half, torch.bfloat16, torch.float]