[Core] draft_model_runner: Implement prepare_inputs on GPU for advance_step (#6338)
This commit is contained in:
committed by
GitHub
parent
5f0b9933e6
commit
e76466dde2
@@ -227,6 +227,7 @@ def get_output_from_llm_generator(
|
||||
maybe_assert_ngram_worker(llm)
|
||||
|
||||
outputs = llm.generate(prompts, sampling_params, use_tqdm=True)
|
||||
|
||||
token_ids = [output.outputs[0].token_ids for output in outputs]
|
||||
tokens = [output.outputs[0].text for output in outputs]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user