[V1][Spec Decode] Ngram Spec Decode (#12193)
Signed-off-by: LiuXiaoxuanPKU <lilyliupku@gmail.com>
This commit is contained in:
@@ -77,6 +77,7 @@ def _create_default_sampling_metadata(
|
||||
temperature=torch.full((batch_size, ), 0.0),
|
||||
all_greedy=True,
|
||||
all_random=False,
|
||||
rejection_sampling=False,
|
||||
top_p=torch.empty(batch_size, ),
|
||||
top_k=torch.empty(batch_size, ),
|
||||
no_top_p=True,
|
||||
@@ -88,6 +89,7 @@ def _create_default_sampling_metadata(
|
||||
prompt_token_ids=_create_prompt_tokens_tensor(prompt_token_ids,
|
||||
vocab_size, device),
|
||||
output_token_ids=output_token_ids,
|
||||
spec_token_ids=[],
|
||||
frequency_penalties=_create_penalty_tensor(batch_size, 0.0, device),
|
||||
presence_penalties=_create_penalty_tensor(batch_size, 0.0, device),
|
||||
repetition_penalties=_create_penalty_tensor(batch_size, 1.0, device),
|
||||
|
||||
Reference in New Issue
Block a user