[Speculative decoding 6/9] Integrate speculative decoding with LLMEngine (#3894)

This commit is contained in:
Cade Daniel
2024-04-16 13:09:21 -07:00
committed by GitHub
parent 69e1d2fb69
commit e95cd87959
31 changed files with 1347 additions and 407 deletions

View File

@@ -125,7 +125,7 @@ def test_same_output_for_single_step():
zero_kv_cache(worker.cache_engine)
set_random_seed(seed)
expected_output = worker.execute_model(
**single_step_execute_model_data.to_dict(), )
**single_step_execute_model_data.to_dict(), )[0]
actual_token_ids = [
output.samples[0].output_token for output in actual_output
@@ -219,7 +219,7 @@ def test_same_output_for_multi_step():
continuations=continuations,
final_seq_lens=final_seq_lens))
single_step_output.append(
single_step_output.extend(
worker.execute_model(**execute_model_data.to_dict(), ))
# Append output tokens to new sequence data.