[Core] draft_model_runner: Implement prepare_inputs on GPU for advance_step (#6338)

This commit is contained in:
Alexander Matveev
2024-07-17 17:30:28 -04:00
committed by GitHub
parent 5f0b9933e6
commit e76466dde2
12 changed files with 568 additions and 130 deletions

View File

@@ -227,6 +227,7 @@ def get_output_from_llm_generator(
maybe_assert_ngram_worker(llm)
outputs = llm.generate(prompts, sampling_params, use_tqdm=True)
token_ids = [output.outputs[0].token_ids for output in outputs]
tokens = [output.outputs[0].text for output in outputs]