[Speculative decoding 6/9] Integrate speculative decoding with LLMEngine (#3894)
This commit is contained in:
@@ -217,7 +217,9 @@ class _AsyncLLMEngine(LLMEngine):
|
||||
else:
|
||||
output = []
|
||||
|
||||
return self._process_model_outputs(output, scheduler_outputs)
|
||||
return self._process_model_outputs(
|
||||
output, scheduler_outputs.scheduled_seq_groups,
|
||||
scheduler_outputs.ignored_seq_groups)
|
||||
|
||||
async def encode_request_async(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user