[Speculative decoding 6/9] Integrate speculative decoding with LLMEngine (#3894)

This commit is contained in:
Cade Daniel
2024-04-16 13:09:21 -07:00
committed by GitHub
parent 69e1d2fb69
commit e95cd87959
31 changed files with 1347 additions and 407 deletions

View File

@@ -40,12 +40,13 @@ class WorkerBase(ABC):
raise NotImplementedError
@abstractmethod
def execute_model(self,
seq_group_metadata_list: List[SequenceGroupMetadata],
blocks_to_swap_in: Dict[int, int],
blocks_to_swap_out: Dict[int, int],
blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput:
"""Executes one model step on the given sequences."""
def execute_model(
self, seq_group_metadata_list: List[SequenceGroupMetadata],
blocks_to_swap_in: Dict[int, int], blocks_to_swap_out: Dict[int,
int],
blocks_to_copy: Dict[int, List[int]]) -> List[SamplerOutput]:
"""Executes at least one model step on the given sequences, unless no
sequences are provided."""
raise NotImplementedError
@abstractmethod