Fix(llm): Abort orphaned requests when llm.chat() batch fails Fixes #26081 (#27420)

Signed-off-by: vensenmu <vensenmu@gmail.com>
This commit is contained in:
Vensen
2025-11-03 00:24:01 +08:00
committed by GitHub
parent 6c317a656e
commit 0ce743f4e1
2 changed files with 75 additions and 14 deletions

View File

@@ -1588,20 +1588,27 @@ class LLM:
tqdm_func = use_tqdm if callable(use_tqdm) else tqdm
it = tqdm_func(it, desc="Adding requests")
for i, prompt in enumerate(it):
if isinstance(prompt, dict):
self._validate_mm_data_and_uuids(
prompt.get("multi_modal_data"), prompt.get("multi_modal_uuids")
)
added_request_ids: list[str] = []
self._add_request(
prompt,
params[i] if isinstance(params, Sequence) else params,
lora_request=lora_request[i]
if isinstance(lora_request, Sequence)
else lora_request,
priority=priority[i] if priority else 0,
)
try:
for i, prompt in enumerate(it):
if isinstance(prompt, dict):
self._validate_mm_data_and_uuids(
prompt.get("multi_modal_data"), prompt.get("multi_modal_uuids")
)
request_id = self._add_request(
prompt,
params[i] if isinstance(params, Sequence) else params,
lora_request=lora_request[i]
if isinstance(lora_request, Sequence)
else lora_request,
priority=priority[i] if priority else 0,
)
added_request_ids.append(request_id)
except Exception as e:
if added_request_ids:
self.llm_engine.abort_request(added_request_ids)
raise e
def _validate_mm_data_and_uuids(
self,
@@ -1684,7 +1691,7 @@ class LLM:
params: SamplingParams | PoolingParams,
lora_request: LoRARequest | None = None,
priority: int = 0,
) -> None:
) -> str:
prompt_text, _, _ = get_prompt_components(prompt)
request_id = str(next(self.request_counter))
@@ -1705,6 +1712,7 @@ class LLM:
priority=priority,
prompt_text=prompt_text,
)
return request_id
def _run_engine(
self, *, use_tqdm: bool | Callable[..., tqdm] = True