[Spec Decode] Introduce DraftModelRunner (#5799)

This commit is contained in:
Cody Yu
2024-06-28 09:17:51 -07:00
committed by GitHub
parent b90d8cd832
commit b2c620230a
15 changed files with 257 additions and 36 deletions

View File

@@ -880,6 +880,8 @@ class ExecuteModelRequest:
running_queue_size: int = 0
# Optional hidden states from prior step.
previous_hidden_states: Optional[HiddenStates] = None
# The number of forward steps to run.
num_steps: int = 1
def clone(
self, seq_group_metadata_list: List[SequenceGroupMetadata]
@@ -893,4 +895,5 @@ class ExecuteModelRequest:
num_lookahead_slots=self.num_lookahead_slots,
running_queue_size=self.running_queue_size,
previous_hidden_states=self.previous_hidden_states,
num_steps=self.num_steps,
)