[LoRA] Cleanup LoRA unused code (#29611)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-11-29 14:52:58 +08:00
parent 4a80ad0a25
commit 39e63dec7c
46 changed files with 126 additions and 173 deletions
--- a/examples/offline_inference/multilora_inference.py
+++ b/examples/offline_inference/multilora_inference.py
@@ -46,7 +46,6 @@ def create_test_prompts(
                logprobs=1,
                prompt_logprobs=1,
                max_tokens=128,
-                stop_token_ids=[32003],
            ),
            LoRARequest("sql-lora", 1, lora_path),
        ),
@@ -57,7 +56,6 @@ def create_test_prompts(
                logprobs=1,
                prompt_logprobs=1,
                max_tokens=128,
-                stop_token_ids=[32003],
            ),
            LoRARequest("sql-lora2", 2, lora_path),
        ),
@@ -98,7 +96,7 @@ def initialize_engine() -> LLMEngine:
    #   use the same rank, it is recommended to set this as low as possible.
    # max_cpu_loras: controls the size of the CPU LoRA cache.
    engine_args = EngineArgs(
-        model="meta-llama/Llama-2-7b-hf",
+        model="meta-llama/Llama-3.2-3B-Instruct",
        enable_lora=True,
        max_loras=1,
        max_lora_rank=8,
@@ -111,7 +109,7 @@ def initialize_engine() -> LLMEngine:
 def main():
    """Main function that sets up and runs the prompt processing."""
    engine = initialize_engine()
-    lora_path = snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")
+    lora_path = snapshot_download(repo_id="jeeejeee/llama32-3b-text2sql-spider")
    test_prompts = create_test_prompts(lora_path)
    process_requests(engine, test_prompts)