[V0 Deprecation] Remove Prompt Adapters (#20588)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-07-23 19:36:48 -04:00
committed by GitHub
parent 78c13e30e1
commit 82ec66f514
60 changed files with 126 additions and 1727 deletions

View File

@@ -60,10 +60,7 @@ class OpenAIServingTokenization(OpenAIServing):
request_id = f"tokn-{self._base_request_id(raw_request)}"
try:
(
lora_request,
prompt_adapter_request,
) = self._maybe_get_adapters(request)
lora_request = self._maybe_get_adapters(request)
tokenizer = await self.engine_client.get_tokenizer(lora_request)
@@ -104,11 +101,8 @@ class OpenAIServingTokenization(OpenAIServing):
self._log_inputs(request_id,
request_prompts[i],
params=None,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request)
lora_request=lora_request)
# Silently ignore prompt adapter since it does not affect
# tokenization (Unlike in Embeddings API where an error is raised)
if isinstance(engine_prompt,
dict) and "prompt_token_ids" in engine_prompt:
input_ids.extend(engine_prompt["prompt_token_ids"])
@@ -133,21 +127,14 @@ class OpenAIServingTokenization(OpenAIServing):
request_id = f"tokn-{self._base_request_id(raw_request)}"
(
lora_request,
prompt_adapter_request,
) = self._maybe_get_adapters(request)
lora_request = self._maybe_get_adapters(request)
tokenizer = await self.engine_client.get_tokenizer(lora_request)
self._log_inputs(request_id,
request.tokens,
params=None,
lora_request=lora_request,
prompt_adapter_request=prompt_adapter_request)
# Silently ignore prompt adapter since it does not affect tokenization
# (Unlike in Embeddings API where an error is raised)
lora_request=lora_request)
prompt_input = await self._tokenize_prompt_input_async(
request,