[Mypy] Part 3 fix typing for nested directories for most of directory (#4161)
This commit is contained in:
@@ -4,7 +4,9 @@ from dataclasses import dataclass
|
||||
from http import HTTPStatus
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
from pydantic import conint
|
||||
from pydantic import Field
|
||||
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
@@ -45,7 +47,8 @@ class OpenAIServing:
|
||||
]
|
||||
|
||||
self.max_model_len = 0
|
||||
self.tokenizer = None
|
||||
# Lazy initialized
|
||||
self.tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
|
||||
|
||||
try:
|
||||
event_loop = asyncio.get_running_loop()
|
||||
@@ -92,7 +95,7 @@ class OpenAIServing:
|
||||
def _create_logprobs(
|
||||
self,
|
||||
token_ids: List[int],
|
||||
top_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None,
|
||||
top_logprobs: List[Optional[Dict[int, Logprob]]],
|
||||
num_output_top_logprobs: Optional[int] = None,
|
||||
initial_text_offset: int = 0,
|
||||
) -> LogProbs:
|
||||
@@ -108,6 +111,7 @@ class OpenAIServing:
|
||||
token = self.tokenizer.decode(token_id)
|
||||
logprobs.tokens.append(token)
|
||||
logprobs.token_logprobs.append(None)
|
||||
assert logprobs.top_logprobs is not None
|
||||
logprobs.top_logprobs.append(None)
|
||||
else:
|
||||
token_logprob = step_top_logprobs[token_id].logprob
|
||||
@@ -116,6 +120,7 @@ class OpenAIServing:
|
||||
logprobs.token_logprobs.append(token_logprob)
|
||||
|
||||
if num_output_top_logprobs:
|
||||
assert logprobs.top_logprobs is not None
|
||||
logprobs.top_logprobs.append({
|
||||
# Convert float("-inf") to the
|
||||
# JSON-serializable float that OpenAI uses
|
||||
@@ -155,9 +160,9 @@ class OpenAIServing:
|
||||
|
||||
async def _check_model(self, request) -> Optional[ErrorResponse]:
|
||||
if request.model in self.served_model_names:
|
||||
return
|
||||
return None
|
||||
if request.model in [lora.lora_name for lora in self.lora_requests]:
|
||||
return
|
||||
return None
|
||||
return self.create_error_response(
|
||||
message=f"The model `{request.model}` does not exist.",
|
||||
err_type="NotFoundError",
|
||||
@@ -165,7 +170,7 @@ class OpenAIServing:
|
||||
|
||||
def _maybe_get_lora(self, request) -> Optional[LoRARequest]:
|
||||
if request.model in self.served_model_names:
|
||||
return
|
||||
return None
|
||||
for lora in self.lora_requests:
|
||||
if request.model == lora.lora_name:
|
||||
return lora
|
||||
@@ -177,7 +182,7 @@ class OpenAIServing:
|
||||
request: Union[ChatCompletionRequest, CompletionRequest],
|
||||
prompt: Optional[str] = None,
|
||||
prompt_ids: Optional[List[int]] = None,
|
||||
truncate_prompt_tokens: Optional[conint(ge=1)] = None
|
||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
||||
) -> Tuple[List[int], str]:
|
||||
if not (prompt or prompt_ids):
|
||||
raise ValueError("Either prompt or prompt_ids should be provided.")
|
||||
|
||||
Reference in New Issue
Block a user