[CI/Build] mypy: Resolve some errors from checking vllm/engine (#9267)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
Russell Bryant
2024-10-16 18:55:59 -04:00
committed by GitHub
parent 8345045833
commit 776dbd74f1
20 changed files with 109 additions and 74 deletions

View File

@@ -626,13 +626,14 @@ class CacheConfig:
self.sliding_window = sliding_window
self.enable_prefix_caching = enable_prefix_caching
self.cpu_offload_gb = cpu_offload_gb
self._verify_args()
self._verify_cache_dtype()
self._verify_prefix_caching()
# Will be set after profiling.
self.num_gpu_blocks = None
self.num_cpu_blocks = None
self.num_gpu_blocks: Optional[int] = None
self.num_cpu_blocks: Optional[int] = None
def metrics_info(self):
# convert cache_config to dict(key: str, value: str) for prometheus
@@ -709,7 +710,8 @@ class TokenizerPoolConfig:
@classmethod
def create_config(
cls, tokenizer_pool_size: int, tokenizer_pool_type: str,
cls, tokenizer_pool_size: int,
tokenizer_pool_type: Union[str, Type["BaseTokenizerGroup"]],
tokenizer_pool_extra_config: Optional[Union[str, dict]]
) -> Optional["TokenizerPoolConfig"]:
"""Create a TokenizerPoolConfig from the given parameters.
@@ -1544,7 +1546,7 @@ class LoRAConfig:
max_loras: int
fully_sharded_loras: bool = False
max_cpu_loras: Optional[int] = None
lora_dtype: Optional[torch.dtype] = None
lora_dtype: Optional[Union[torch.dtype, str]] = None
lora_extra_vocab_size: int = 256
# This is a constant.
lora_vocab_padding_size: ClassVar[int] = 256