[Deprecation] Remove prompt_token_ids arg fallback in LLM.generate and LLM.embed (#18800)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-08-22 10:56:57 +08:00
parent 19fe1a0510
commit 8896eb72eb
24 changed files with 116 additions and 467 deletions
--- a/examples/offline_inference/spec_decode.py
+++ b/examples/offline_inference/spec_decode.py
@@ -5,6 +5,7 @@ from transformers import AutoTokenizer

 from vllm import LLM, SamplingParams
 from vllm.benchmarks.datasets import add_dataset_parser, get_samples
+from vllm.inputs import TokensPrompt
 from vllm.v1.metrics.reader import Counter, Vector

 try:
@@ -137,7 +138,8 @@ def main():
    sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len)
    if not args.custom_mm_prompts:
        outputs = llm.generate(
-            prompt_token_ids=prompt_ids, sampling_params=sampling_params
+            TokensPrompt(prompt_token_ids=prompt_ids),
+            sampling_params=sampling_params,
        )
    else:
        outputs = llm.chat(prompts, sampling_params=sampling_params)