[Model] Jamba support (#4115)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
Co-authored-by: Erez Schwartz <erezs@ai21.com>
Co-authored-by: Mor Zusman <morz@ai21.com>
Co-authored-by: tomeras91 <57313761+tomeras91@users.noreply.github.com>
Co-authored-by: Tomer Asida <tomera@ai21.com>
Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
Co-authored-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
Mor Zusman
2024-07-03 02:11:29 +03:00
committed by GitHub
parent ee93f4f92a
commit 9d6a8daa87
21 changed files with 1192 additions and 34 deletions

View File

@@ -224,6 +224,8 @@ class _AsyncLLMEngine(LLMEngine):
"""
seq_group_metadata_list, scheduler_outputs = self.scheduler[
virtual_engine].schedule()
finished_requests_ids = self.scheduler[
virtual_engine].get_and_reset_finished_requests_ids()
if not scheduler_outputs.is_empty():
# Execute the model.
@@ -235,7 +237,7 @@ class _AsyncLLMEngine(LLMEngine):
virtual_engine=virtual_engine,
num_lookahead_slots=scheduler_outputs.num_lookahead_slots,
running_queue_size=scheduler_outputs.running_queue_size,
)
finished_requests_ids=finished_requests_ids)
output = await self.model_executor.execute_model_async(
execute_model_req)
else: