[Misc] unify variable for LLM instance (#20996)

Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
Ning Xie
2025-07-21 19:18:33 +08:00
committed by GitHub
parent e6b90a2805
commit d97841078b
53 changed files with 237 additions and 236 deletions

View File

@@ -28,10 +28,10 @@ def main(args: Namespace):
# Create an LLM.
# You should pass task="classify" for classification models
model = LLM(**vars(args))
llm = LLM(**vars(args))
# Generate logits. The output is a list of ClassificationRequestOutputs.
outputs = model.classify(prompts)
outputs = llm.classify(prompts)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)

View File

@@ -31,10 +31,10 @@ def main(args: Namespace):
# Create an LLM.
# You should pass task="embed" for embedding models
model = LLM(**vars(args))
llm = LLM(**vars(args))
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
outputs = model.embed(prompts)
outputs = llm.embed(prompts)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)

View File

@@ -27,10 +27,10 @@ def main(args: Namespace):
# Create an LLM.
# You should pass task="score" for cross-encoder models
model = LLM(**vars(args))
llm = LLM(**vars(args))
# Generate scores. The output is a list of ScoringRequestOutputs.
outputs = model.score(text_1, texts_2)
outputs = llm.score(text_1, texts_2)
# Print the outputs.
print("\nGenerated Outputs:\n" + "-" * 60)

View File

@@ -30,11 +30,11 @@ def main(args: Namespace):
# Create an LLM.
# You should pass task="embed" for embedding models
model = LLM(**vars(args))
llm = LLM(**vars(args))
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
# Only text matching task is supported for now. See #16120
outputs = model.embed(prompts)
outputs = llm.embed(prompts)
# Print the outputs.
print("\nGenerated Outputs:")

View File

@@ -30,10 +30,10 @@ def main(args: Namespace):
# Create an LLM.
# You should pass task="embed" for embedding models
model = LLM(**vars(args))
llm = LLM(**vars(args))
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
outputs = model.embed(prompts, pooling_params=PoolingParams(dimensions=32))
outputs = llm.embed(prompts, pooling_params=PoolingParams(dimensions=32))
# Print the outputs.
print("\nGenerated Outputs:")

View File

@@ -25,7 +25,7 @@ def config_buckets():
os.environ["NEURON_TOKEN_GEN_BUCKETS"] = "128,512,1024,2048"
def initialize_model():
def initialize_llm():
"""Create an LLM with speculative decoding."""
return LLM(
model="openlm-research/open_llama_7b",
@@ -43,9 +43,9 @@ def initialize_model():
)
def process_requests(model: LLM, sampling_params: SamplingParams):
def process_requests(llm: LLM, sampling_params: SamplingParams):
"""Generate texts from prompts and print them."""
outputs = model.generate(prompts, sampling_params)
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
@@ -53,12 +53,12 @@ def process_requests(model: LLM, sampling_params: SamplingParams):
def main():
"""Main function that sets up the model and processes prompts."""
"""Main function that sets up the llm and processes prompts."""
config_buckets()
model = initialize_model()
llm = initialize_llm()
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, top_k=1)
process_requests(model, sampling_params)
process_requests(llm, sampling_params)
if __name__ == "__main__":

View File

@@ -140,7 +140,7 @@ datamodule_config = {
class PrithviMAE:
def __init__(self):
print("Initializing PrithviMAE model")
self.model = LLM(
self.llm = LLM(
model=os.path.join(os.path.dirname(__file__), "./model"),
skip_tokenizer_init=True,
dtype="float32",
@@ -158,7 +158,7 @@ class PrithviMAE:
prompt = {"prompt_token_ids": [1], "multi_modal_data": mm_data}
outputs = self.model.encode(prompt, use_tqdm=False)
outputs = self.llm.encode(prompt, use_tqdm=False)
print("################ Inference done (it took seconds) ##############")
return outputs[0].outputs.data

View File

@@ -17,13 +17,13 @@ model_name = "Qwen/Qwen3-Reranker-0.6B"
# Models converted offline using this method can not only be more efficient
# and support the vllm score API, but also make the init parameters more
# concise, for example.
# model = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")
# llm = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")
# If you want to load the official original version, the init parameters are
# as follows.
def get_model() -> LLM:
def get_llm() -> LLM:
"""Initializes and returns the LLM model for Qwen3-Reranker."""
return LLM(
model=model_name,
@@ -77,8 +77,8 @@ def main() -> None:
]
documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents]
model = get_model()
outputs = model.score(queries, documents)
llm = get_llm()
outputs = llm.score(queries, documents)
print("-" * 30)
print([output.outputs.score for output in outputs])