[Speculative decoding 6/9] Integrate speculative decoding with LLMEngine (#3894)

This commit is contained in:
Cade Daniel
2024-04-16 13:09:21 -07:00
committed by GitHub
parent 69e1d2fb69
commit e95cd87959
31 changed files with 1347 additions and 407 deletions

View File

@@ -217,7 +217,9 @@ class _AsyncLLMEngine(LLMEngine):
else:
output = []
return self._process_model_outputs(output, scheduler_outputs)
return self._process_model_outputs(
output, scheduler_outputs.scheduled_seq_groups,
scheduler_outputs.ignored_seq_groups)
async def encode_request_async(
self,