Refactor system architecture (#109)
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
"""Utils for model executor."""
|
||||
import random
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -9,28 +8,6 @@ from cacheflow.model_executor.parallel_utils.parallel_state import model_paralle
|
||||
from cacheflow.model_executor.parallel_utils.tensor_parallel import model_parallel_cuda_manual_seed
|
||||
|
||||
|
||||
_STR_DTYPE_TO_TORCH_DTYPE = {
|
||||
"half": torch.half,
|
||||
"float": torch.float,
|
||||
"float16": torch.float16,
|
||||
"float32": torch.float32,
|
||||
"bfloat16": torch.bfloat16,
|
||||
}
|
||||
|
||||
|
||||
def get_torch_dtype(dtype: Union[torch.dtype, str]) -> torch.dtype:
|
||||
if isinstance(dtype, str):
|
||||
torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype.lower()]
|
||||
else:
|
||||
torch_dtype = dtype
|
||||
return torch_dtype
|
||||
|
||||
|
||||
def get_dtype_size(dtype: Union[torch.dtype, str]) -> int:
|
||||
torch_dtype = get_torch_dtype(dtype)
|
||||
return torch.tensor([], dtype=torch_dtype).element_size()
|
||||
|
||||
|
||||
def set_random_seed(seed: int) -> None:
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
@@ -40,15 +17,3 @@ def set_random_seed(seed: int) -> None:
|
||||
|
||||
if model_parallel_is_initialized():
|
||||
model_parallel_cuda_manual_seed(seed)
|
||||
|
||||
|
||||
def get_cache_block_size(block_size: int,
|
||||
num_heads: int,
|
||||
head_size: int,
|
||||
num_layers: int,
|
||||
dtype: str) -> int:
|
||||
key_cache_block = block_size * num_heads * head_size
|
||||
value_cache_block = key_cache_block
|
||||
total = num_layers * (key_cache_block + value_cache_block)
|
||||
dtype_size = get_dtype_size(dtype)
|
||||
return dtype_size * total
|
||||
|
||||
Reference in New Issue
Block a user