FastAPI-based working frontend (#10)

This commit is contained in:
Zhuohan Li
2023-03-29 14:48:56 +08:00
committed by GitHub
parent d359cda5fa
commit 721fa3df15
15 changed files with 536 additions and 146 deletions

View File

@@ -44,11 +44,14 @@ def get_memory_analyzer(
model_name: str,
block_size: int,
dtype: Union[torch.dtype, str],
gpu_memory: int,
cpu_memory: int,
tensor_parallel_size: int = 1,
) -> CacheFlowMemoryAnalyzer:
torch_dtype = get_torch_dtype(dtype)
for model_class, memory_analyzer in _MEMORY_ANALYZERS.items():
if model_class in model_name:
return memory_analyzer(
model_name, block_size, torch_dtype, tensor_parallel_size)
model_name, block_size, torch_dtype, gpu_memory, cpu_memory,
tensor_parallel_size)
raise ValueError(f'Unsupported model name: {model_name}')