Refactor system architecture (#109)

This commit is contained in:
Woosuk Kwon
2023-05-20 13:06:59 -07:00
committed by GitHub
parent 7297fa6f7c
commit c3442c1f6f
24 changed files with 1017 additions and 1034 deletions

View File

@@ -10,9 +10,9 @@ from cacheflow import cache_ops
from cacheflow import pos_encoding_ops
from cacheflow.model_executor.input_metadata import InputMetadata
_SUPPORTED_HEAD_SIZES = [32, 64, 80, 96, 128, 160, 192, 256]
class GPTCacheFlowAttention(nn.Module):
"""GPT-style multi-head attention.