Refactor system architecture (#109)

2023-05-20 13:06:59 -07:00
parent 7297fa6f7c
commit c3442c1f6f
24 changed files with 1017 additions and 1034 deletions
--- a/cacheflow/model_executor/utils.py
+++ b/cacheflow/model_executor/utils.py
@@ -1,6 +1,5 @@
 """Utils for model executor."""
 import random
-from typing import Union

 import numpy as np
 import torch
@@ -9,28 +8,6 @@ from cacheflow.model_executor.parallel_utils.parallel_state import model_paralle
 from cacheflow.model_executor.parallel_utils.tensor_parallel import model_parallel_cuda_manual_seed


-_STR_DTYPE_TO_TORCH_DTYPE = {
-    "half": torch.half,
-    "float": torch.float,
-    "float16": torch.float16,
-    "float32": torch.float32,
-    "bfloat16": torch.bfloat16,
-}
-
-
-def get_torch_dtype(dtype: Union[torch.dtype, str]) -> torch.dtype:
-    if isinstance(dtype, str):
-        torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype.lower()]
-    else:
-        torch_dtype = dtype
-    return torch_dtype
-
-
-def get_dtype_size(dtype: Union[torch.dtype, str]) -> int:
-    torch_dtype = get_torch_dtype(dtype)
-    return torch.tensor([], dtype=torch_dtype).element_size()
-
-
 def set_random_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
@@ -40,15 +17,3 @@ def set_random_seed(seed: int) -> None:

    if model_parallel_is_initialized():
        model_parallel_cuda_manual_seed(seed)
-
-
-def get_cache_block_size(block_size: int,
-                         num_heads: int,
-                         head_size: int,
-                         num_layers: int,
-                         dtype: str) -> int:
-    key_cache_block = block_size * num_heads * head_size
-    value_cache_block = key_cache_block
-    total = num_layers * (key_cache_block + value_cache_block)
-    dtype_size = get_dtype_size(dtype)
-    return dtype_size * total