[Misc] unify variable for LLM instance (#20996)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@@ -28,10 +28,10 @@ def main(args: Namespace):
|
||||
|
||||
# Create an LLM.
|
||||
# You should pass task="classify" for classification models
|
||||
model = LLM(**vars(args))
|
||||
llm = LLM(**vars(args))
|
||||
|
||||
# Generate logits. The output is a list of ClassificationRequestOutputs.
|
||||
outputs = model.classify(prompts)
|
||||
outputs = llm.classify(prompts)
|
||||
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:\n" + "-" * 60)
|
||||
|
||||
@@ -31,10 +31,10 @@ def main(args: Namespace):
|
||||
|
||||
# Create an LLM.
|
||||
# You should pass task="embed" for embedding models
|
||||
model = LLM(**vars(args))
|
||||
llm = LLM(**vars(args))
|
||||
|
||||
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
|
||||
outputs = model.embed(prompts)
|
||||
outputs = llm.embed(prompts)
|
||||
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:\n" + "-" * 60)
|
||||
|
||||
@@ -27,10 +27,10 @@ def main(args: Namespace):
|
||||
|
||||
# Create an LLM.
|
||||
# You should pass task="score" for cross-encoder models
|
||||
model = LLM(**vars(args))
|
||||
llm = LLM(**vars(args))
|
||||
|
||||
# Generate scores. The output is a list of ScoringRequestOutputs.
|
||||
outputs = model.score(text_1, texts_2)
|
||||
outputs = llm.score(text_1, texts_2)
|
||||
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:\n" + "-" * 60)
|
||||
|
||||
@@ -30,11 +30,11 @@ def main(args: Namespace):
|
||||
|
||||
# Create an LLM.
|
||||
# You should pass task="embed" for embedding models
|
||||
model = LLM(**vars(args))
|
||||
llm = LLM(**vars(args))
|
||||
|
||||
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
|
||||
# Only text matching task is supported for now. See #16120
|
||||
outputs = model.embed(prompts)
|
||||
outputs = llm.embed(prompts)
|
||||
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:")
|
||||
|
||||
@@ -30,10 +30,10 @@ def main(args: Namespace):
|
||||
|
||||
# Create an LLM.
|
||||
# You should pass task="embed" for embedding models
|
||||
model = LLM(**vars(args))
|
||||
llm = LLM(**vars(args))
|
||||
|
||||
# Generate embedding. The output is a list of EmbeddingRequestOutputs.
|
||||
outputs = model.embed(prompts, pooling_params=PoolingParams(dimensions=32))
|
||||
outputs = llm.embed(prompts, pooling_params=PoolingParams(dimensions=32))
|
||||
|
||||
# Print the outputs.
|
||||
print("\nGenerated Outputs:")
|
||||
|
||||
@@ -25,7 +25,7 @@ def config_buckets():
|
||||
os.environ["NEURON_TOKEN_GEN_BUCKETS"] = "128,512,1024,2048"
|
||||
|
||||
|
||||
def initialize_model():
|
||||
def initialize_llm():
|
||||
"""Create an LLM with speculative decoding."""
|
||||
return LLM(
|
||||
model="openlm-research/open_llama_7b",
|
||||
@@ -43,9 +43,9 @@ def initialize_model():
|
||||
)
|
||||
|
||||
|
||||
def process_requests(model: LLM, sampling_params: SamplingParams):
|
||||
def process_requests(llm: LLM, sampling_params: SamplingParams):
|
||||
"""Generate texts from prompts and print them."""
|
||||
outputs = model.generate(prompts, sampling_params)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
@@ -53,12 +53,12 @@ def process_requests(model: LLM, sampling_params: SamplingParams):
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function that sets up the model and processes prompts."""
|
||||
"""Main function that sets up the llm and processes prompts."""
|
||||
config_buckets()
|
||||
model = initialize_model()
|
||||
llm = initialize_llm()
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, top_k=1)
|
||||
process_requests(model, sampling_params)
|
||||
process_requests(llm, sampling_params)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -140,7 +140,7 @@ datamodule_config = {
|
||||
class PrithviMAE:
|
||||
def __init__(self):
|
||||
print("Initializing PrithviMAE model")
|
||||
self.model = LLM(
|
||||
self.llm = LLM(
|
||||
model=os.path.join(os.path.dirname(__file__), "./model"),
|
||||
skip_tokenizer_init=True,
|
||||
dtype="float32",
|
||||
@@ -158,7 +158,7 @@ class PrithviMAE:
|
||||
|
||||
prompt = {"prompt_token_ids": [1], "multi_modal_data": mm_data}
|
||||
|
||||
outputs = self.model.encode(prompt, use_tqdm=False)
|
||||
outputs = self.llm.encode(prompt, use_tqdm=False)
|
||||
print("################ Inference done (it took seconds) ##############")
|
||||
|
||||
return outputs[0].outputs.data
|
||||
|
||||
@@ -17,13 +17,13 @@ model_name = "Qwen/Qwen3-Reranker-0.6B"
|
||||
# Models converted offline using this method can not only be more efficient
|
||||
# and support the vllm score API, but also make the init parameters more
|
||||
# concise, for example.
|
||||
# model = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")
|
||||
# llm = LLM(model="tomaarsen/Qwen3-Reranker-0.6B-seq-cls", task="score")
|
||||
|
||||
# If you want to load the official original version, the init parameters are
|
||||
# as follows.
|
||||
|
||||
|
||||
def get_model() -> LLM:
|
||||
def get_llm() -> LLM:
|
||||
"""Initializes and returns the LLM model for Qwen3-Reranker."""
|
||||
return LLM(
|
||||
model=model_name,
|
||||
@@ -77,8 +77,8 @@ def main() -> None:
|
||||
]
|
||||
documents = [document_template.format(doc=doc, suffix=suffix) for doc in documents]
|
||||
|
||||
model = get_model()
|
||||
outputs = model.score(queries, documents)
|
||||
llm = get_llm()
|
||||
outputs = llm.score(queries, documents)
|
||||
|
||||
print("-" * 30)
|
||||
print([output.outputs.score for output in outputs])
|
||||
|
||||
Reference in New Issue
Block a user