[Mypy] Part 3 fix typing for nested directories for most of directory (#4161)

This commit is contained in:
SangBin Cho
2024-04-23 13:32:44 +09:00
committed by GitHub
parent 34128a697e
commit 0ae11f78ab
29 changed files with 126 additions and 88 deletions

View File

@@ -4,7 +4,9 @@ from dataclasses import dataclass
from http import HTTPStatus
from typing import Dict, List, Optional, Tuple, Union
from pydantic import conint
from pydantic import Field
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
from typing_extensions import Annotated
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
@@ -45,7 +47,8 @@ class OpenAIServing:
]
self.max_model_len = 0
self.tokenizer = None
# Lazy initialized
self.tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
try:
event_loop = asyncio.get_running_loop()
@@ -92,7 +95,7 @@ class OpenAIServing:
def _create_logprobs(
self,
token_ids: List[int],
top_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None,
top_logprobs: List[Optional[Dict[int, Logprob]]],
num_output_top_logprobs: Optional[int] = None,
initial_text_offset: int = 0,
) -> LogProbs:
@@ -108,6 +111,7 @@ class OpenAIServing:
token = self.tokenizer.decode(token_id)
logprobs.tokens.append(token)
logprobs.token_logprobs.append(None)
assert logprobs.top_logprobs is not None
logprobs.top_logprobs.append(None)
else:
token_logprob = step_top_logprobs[token_id].logprob
@@ -116,6 +120,7 @@ class OpenAIServing:
logprobs.token_logprobs.append(token_logprob)
if num_output_top_logprobs:
assert logprobs.top_logprobs is not None
logprobs.top_logprobs.append({
# Convert float("-inf") to the
# JSON-serializable float that OpenAI uses
@@ -155,9 +160,9 @@ class OpenAIServing:
async def _check_model(self, request) -> Optional[ErrorResponse]:
if request.model in self.served_model_names:
return
return None
if request.model in [lora.lora_name for lora in self.lora_requests]:
return
return None
return self.create_error_response(
message=f"The model `{request.model}` does not exist.",
err_type="NotFoundError",
@@ -165,7 +170,7 @@ class OpenAIServing:
def _maybe_get_lora(self, request) -> Optional[LoRARequest]:
if request.model in self.served_model_names:
return
return None
for lora in self.lora_requests:
if request.model == lora.lora_name:
return lora
@@ -177,7 +182,7 @@ class OpenAIServing:
request: Union[ChatCompletionRequest, CompletionRequest],
prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[conint(ge=1)] = None
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
) -> Tuple[List[int], str]:
if not (prompt or prompt_ids):
raise ValueError("Either prompt or prompt_ids should be provided.")