[Misc] unify variable for LLM instance (#20996)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
2025-07-21 19:18:33 +08:00
parent e6b90a2805
commit d97841078b
53 changed files with 237 additions and 236 deletions
--- a/examples/offline_inference/basic/classify.py
+++ b/examples/offline_inference/basic/classify.py
@@ -28,10 +28,10 @@ def main(args: Namespace):

    # Create an LLM.
    # You should pass task="classify" for classification models
-    model = LLM(**vars(args))
+    llm = LLM(**vars(args))

    # Generate logits. The output is a list of ClassificationRequestOutputs.
-    outputs = model.classify(prompts)
+    outputs = llm.classify(prompts)

    # Print the outputs.
    print("\nGenerated Outputs:\n" + "-" * 60)
--- a/examples/offline_inference/basic/embed.py
+++ b/examples/offline_inference/basic/embed.py
@@ -31,10 +31,10 @@ def main(args: Namespace):

    # Create an LLM.
    # You should pass task="embed" for embedding models
-    model = LLM(**vars(args))
+    llm = LLM(**vars(args))

    # Generate embedding. The output is a list of EmbeddingRequestOutputs.
-    outputs = model.embed(prompts)
+    outputs = llm.embed(prompts)

    # Print the outputs.
    print("\nGenerated Outputs:\n" + "-" * 60)
--- a/examples/offline_inference/basic/score.py
+++ b/examples/offline_inference/basic/score.py
@@ -27,10 +27,10 @@ def main(args: Namespace):

    # Create an LLM.
    # You should pass task="score" for cross-encoder models
-    model = LLM(**vars(args))
+    llm = LLM(**vars(args))

    # Generate scores. The output is a list of ScoringRequestOutputs.
-    outputs = model.score(text_1, texts_2)
+    outputs = llm.score(text_1, texts_2)

    # Print the outputs.
    print("\nGenerated Outputs:\n" + "-" * 60)
--- a/examples/offline_inference/embed_jina_embeddings_v3.py
+++ b/examples/offline_inference/embed_jina_embeddings_v3.py
@@ -30,11 +30,11 @@ def main(args: Namespace):

    # Create an LLM.
    # You should pass task="embed" for embedding models
-    model = LLM(**vars(args))
+    llm = LLM(**vars(args))

    # Generate embedding. The output is a list of EmbeddingRequestOutputs.
    # Only text matching task is supported for now. See #16120
-    outputs = model.embed(prompts)
+    outputs = llm.embed(prompts)

    # Print the outputs.
    print("\nGenerated Outputs:")
--- a/examples/offline_inference/embed_matryoshka_fy.py
+++ b/examples/offline_inference/embed_matryoshka_fy.py
@@ -30,10 +30,10 @@ def main(args: Namespace):

    # Create an LLM.
    # You should pass task="embed" for embedding models
-    model = LLM(**vars(args))
+    llm = LLM(**vars(args))

    # Generate embedding. The output is a list of EmbeddingRequestOutputs.
-    outputs = model.embed(prompts, pooling_params=PoolingParams(dimensions=32))
+    outputs = llm.embed(prompts, pooling_params=PoolingParams(dimensions=32))

    # Print the outputs.
    print("\nGenerated Outputs:")
--- a/examples/offline_inference/neuron_speculation.py
+++ b/examples/offline_inference/neuron_speculation.py
@@ -25,7 +25,7 @@ def config_buckets():
    os.environ["NEURON_TOKEN_GEN_BUCKETS"] = "128,512,1024,2048"


-def initialize_model():
+def initialize_llm():
    """Create an LLM with speculative decoding."""
    return LLM(
        model="openlm-research/open_llama_7b",
@@ -43,9 +43,9 @@ def initialize_model():
    )


-def process_requests(model: LLM, sampling_params: SamplingParams):
+def process_requests(llm: LLM, sampling_params: SamplingParams):
    """Generate texts from prompts and print them."""
-    outputs = model.generate(prompts, sampling_params)
+    outputs = llm.generate(prompts, sampling_params)
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
@@ -53,12 +53,12 @@ def process_requests(model: LLM, sampling_params: SamplingParams):


 def main():
-    """Main function that sets up the model and processes prompts."""
+    """Main function that sets up the llm and processes prompts."""
    config_buckets()
-    model = initialize_model()
+    llm = initialize_llm()
    # Create a sampling params object.
    sampling_params = SamplingParams(max_tokens=100, top_k=1)
-    process_requests(model, sampling_params)
+    process_requests(llm, sampling_params)


 if __name__ == "__main__":
--- a/examples/offline_inference/prithvi_geospatial_mae.py
+++ b/examples/offline_inference/prithvi_geospatial_mae.py
@@ -140,7 +140,7 @@ datamodule_config = {
 class PrithviMAE:
    def __init__(self):
        print("Initializing PrithviMAE model")
-        self.model = LLM(
+        self.llm = LLM(
            model=os.path.join(os.path.dirname(__file__), "./model"),
            skip_tokenizer_init=True,
            dtype="float32",
@@ -158,7 +158,7 @@ class PrithviMAE:

        prompt = {"prompt_token_ids": [1], "multi_modal_data": mm_data}

-        outputs = self.model.encode(prompt, use_tqdm=False)
+        outputs = self.llm.encode(prompt, use_tqdm=False)
        print("################ Inference done (it took seconds)  ##############")

        return outputs[0].outputs.data
--- a/examples/offline_inference/qwen3_reranker.py
+++ b/examples/offline_inference/qwen3_reranker.py
@@ -17,13 +17,13 @@ model_name = "Qwen/Qwen3-Reranker-0.6B"
 # Models converted offline using this method can not only be more efficient
 # and support the vllm score API, but also make the init parameters more
 # concise, for example.
-# model = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")
+# llm = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")

 # If you want to load the official original version, the init parameters are
 # as follows.


-def get_model() -> LLM:
+def get_llm() -> LLM:
    """Initializes and returns the LLM model for Qwen3-Reranker."""
    return LLM(
        model=model_name,
@@ -77,8 +77,8 @@ def main() -> None:
    ]
    documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents]

-    model = get_model()
-    outputs = model.score(queries, documents)
+    llm = get_llm()
+    outputs = llm.score(queries, documents)

    print("-" * 30)
    print([output.outputs.score for output in outputs])